vortex_client 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/examples/README.rdoc +30 -0
- data/examples/create_personpresentations.rb +147 -0
- data/examples/dice.gif +0 -0
- data/examples/import_static_site.rb +211 -0
- data/examples/no_right_margin.rb +11 -0
- data/examples/propfind_proppatch.rb +7 -0
- data/examples/replace_spike.rb +42 -0
- data/examples/scrape_hero_publications.rb +38 -0
- data/examples/scrape_holocaust.rb +351 -0
- data/examples/scrape_holocaust_related_links.rb +134 -0
- data/examples/scrape_vortex_search.rb +44 -0
- data/examples/search_replace_documents.rb +57 -0
- data/examples/search_replace_documents_frontpage.rb +88 -0
- data/examples/test_searc_replace.rb +3 -0
- data/examples/unpublish.rb +11 -0
- data/examples/upload_image.rb +8 -0
- data/examples/upload_image_flymake.rb +8 -0
- data/lib/vortex_client.rb +2 -1
- data/test/test_vortex_article_publish.rb +3 -16
- metadata +48 -17
- data/.gitignore +0 -21
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.7.0
|
@@ -0,0 +1,30 @@
|
|
1
|
+
= Vortex Client Examples
|
2
|
+
|
3
|
+
= publish_article.rb
|
4
|
+
|
5
|
+
Creates a standard article. Stores file as JSON on server, which is the
|
6
|
+
|
7
|
+
= upload_file.rb
|
8
|
+
|
9
|
+
Upload local (binary) file to server.
|
10
|
+
|
11
|
+
= personer_presentation.rb
|
12
|
+
|
13
|
+
Create a person presentation that displays info from ldap and publications database.
|
14
|
+
|
15
|
+
= publish_event.rb
|
16
|
+
|
17
|
+
Publish an event with iCal link.
|
18
|
+
|
19
|
+
= sitemap.rb
|
20
|
+
|
21
|
+
Creates a file with that can be used as a basis for a sitemap.
|
22
|
+
|
23
|
+
= create_collection.rb
|
24
|
+
|
25
|
+
Creates various types of directories, called collections in webdav, on server.
|
26
|
+
|
27
|
+
= unpublish.rb
|
28
|
+
|
29
|
+
Hides documents for visitors by setting the documents unpublish-date to current time and date.
|
30
|
+
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# Create person presentations in vortex.
|
4
|
+
#
|
5
|
+
# Input is a vortex folder with images like this:
|
6
|
+
#
|
7
|
+
# /person/
|
8
|
+
# /adm/
|
9
|
+
# username.jpg
|
10
|
+
# user2.jpg
|
11
|
+
#
|
12
|
+
# For each image, the script creates a folder, a person presentation named index.html
|
13
|
+
# and moves the image file. After completion, the folder should look like this:
|
14
|
+
#
|
15
|
+
# /person/
|
16
|
+
# /adm/
|
17
|
+
# username/
|
18
|
+
# index.html
|
19
|
+
# username.jpg
|
20
|
+
# user2/
|
21
|
+
# index.html
|
22
|
+
# user2.jpg
|
23
|
+
#
|
24
|
+
# Author: Thomas Flemming thomasfl (at) usit.uio.no
|
25
|
+
|
26
|
+
require 'rubygems'
|
27
|
+
require 'vortex_client'
|
28
|
+
require 'open-uri'
|
29
|
+
require 'time'
|
30
|
+
require 'ldap_util'
|
31
|
+
|
32
|
+
def create_person_listing_folder(new_url)
|
33
|
+
mkdir(new_url)
|
34
|
+
puts "Creating folder: " + new_url
|
35
|
+
props = '<v:collection-type xmlns:v="vrtx">person-listing</v:collection-type>' +
|
36
|
+
'<v:resourceType xmlns:v="vrtx">person-listing</v:resourceType>'
|
37
|
+
begin
|
38
|
+
@vortex.proppatch(new_url, props )
|
39
|
+
rescue
|
40
|
+
puts "Warning: problems patching folder: " + new_url
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def mkdir(url)
|
45
|
+
begin
|
46
|
+
@vortex.mkdir(url)
|
47
|
+
rescue
|
48
|
+
puts "Warning: mkdir(" + url + ") exists."
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def copy(src,dest)
|
53
|
+
begin
|
54
|
+
@vortex.copy(src,dest)
|
55
|
+
rescue
|
56
|
+
puts "Warning: cp(src," + dest + ") exists."
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def move(src,dest)
|
61
|
+
begin
|
62
|
+
@vortex.move(src,dest)
|
63
|
+
rescue
|
64
|
+
puts "Warning: move(src," + dest + ") exists."
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def delete(dest)
|
70
|
+
begin
|
71
|
+
@vortex.delete(dest)
|
72
|
+
rescue
|
73
|
+
puts "Warning: delete(" + dest + ") failed."
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def set_realname_as_title(username, path)
|
78
|
+
realname = ldap_realname(username)
|
79
|
+
if(realname) then
|
80
|
+
@vortex.proppatch(path, '<v:userTitle xmlns:v="vrtx">' + realname + '</v:userTitle>')
|
81
|
+
else
|
82
|
+
puts "Warning: Unable to get info from ldap on: " + username
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def create_person_presentation(url, dest_folder, language)
|
87
|
+
username = dest_folder.sub(/\/$/,'')[/([^\/]*)$/,1]
|
88
|
+
dest_image_url = dest_folder + url[/([^\/]*)$/,1]
|
89
|
+
mkdir(dest_folder)
|
90
|
+
set_realname_as_title(username, dest_folder)
|
91
|
+
|
92
|
+
copy(url,dest_image_url)
|
93
|
+
|
94
|
+
if(url.to_s.match(/\/vit\//))then
|
95
|
+
scientific = true
|
96
|
+
else
|
97
|
+
scientific = false
|
98
|
+
end
|
99
|
+
# scientific = true
|
100
|
+
|
101
|
+
person = Vortex::Person.new(:user => username,
|
102
|
+
:image => dest_image_url,
|
103
|
+
:language => language,
|
104
|
+
:scientific => scientific,
|
105
|
+
:url => dest_folder + 'index.html')
|
106
|
+
|
107
|
+
# Use the 'administrative' html template.
|
108
|
+
# Override default html template used for scientific presentations.
|
109
|
+
# person.html = person.create_html(:language => language, :html_template => :administrative)
|
110
|
+
|
111
|
+
@vortex.publish(person)
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
def create_presentations_from_images(src_url, dest_url, language)
|
116
|
+
count = 0
|
117
|
+
@vortex.find(src_url,:recursive => true,:suppress_errors => true) do |item|
|
118
|
+
url = item.url.to_s
|
119
|
+
if(item.type == :directory) then
|
120
|
+
new_url = url.gsub(src_url,dest_url)
|
121
|
+
create_person_listing_folder(new_url)
|
122
|
+
elsif(url.match(/\.jpg$|\.png$/i)) then
|
123
|
+
dest_folder = url.gsub(src_url,dest_url)
|
124
|
+
dest_folder = dest_folder.sub(/\.jpg$|\.png$/i,'/')
|
125
|
+
create_person_presentation(url, dest_folder, language)
|
126
|
+
end
|
127
|
+
count += 1
|
128
|
+
end
|
129
|
+
return count
|
130
|
+
end
|
131
|
+
|
132
|
+
# src_url = 'https://nyweb3-dav.uio.no/konv/ubo/'
|
133
|
+
src_url = 'https://nyweb1-dav.uio.no/personer/genererte-presentasjoner/econ/'
|
134
|
+
@vortex = Vortex::Connection.new(src_url, :use_osx_keychain => true)
|
135
|
+
|
136
|
+
# puts "Restore from backup..."
|
137
|
+
# delete(src_url)
|
138
|
+
# copy('https://nyweb3-dav.uio.no/konv/ubo_backup/', src_url)
|
139
|
+
|
140
|
+
dest_url = 'https://nyweb1-dav.uio.no/personer/genererte-presentasjoner/econ_generert/' ## https://nyweb3-dav.uio.no/konv/ubo_no/'
|
141
|
+
count = create_presentations_from_images(src_url, dest_url, :norwegian)
|
142
|
+
puts "\n\nDone. Created " + count.to_s + " presentations."
|
143
|
+
|
144
|
+
# dest_url = 'https://nyweb3-dav.uio.no/konv/ubo_en/'
|
145
|
+
# count = create_presentations_from_images(src_url, dest_url, :english)
|
146
|
+
# puts "\n\nDone. Created " + count.to_s + " presentations."
|
147
|
+
|
data/examples/dice.gif
ADDED
Binary file
|
@@ -0,0 +1,211 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'rubygems'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'vortex_client'
|
5
|
+
require 'uri'
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'htmlentities'
|
8
|
+
require 'json'
|
9
|
+
require 'iconv'
|
10
|
+
|
11
|
+
class MigrateSuicidologi
|
12
|
+
attr :vortex, :uri
|
13
|
+
|
14
|
+
def initialize(url)
|
15
|
+
@vortex = Vortex::Connection.new(url,:use_osx_keychain => true)
|
16
|
+
@uri = URI.parse(url)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Common regexp for title and introduction
|
20
|
+
def clean_string(string)
|
21
|
+
string = string.gsub(/\r|\n/,'').sub(/^ */,'').sub(/\s*$/,'').gsub(/ +/,' ')
|
22
|
+
coder = HTMLEntities.new()
|
23
|
+
string = coder.decode(string) # Remove html entities
|
24
|
+
return string
|
25
|
+
end
|
26
|
+
|
27
|
+
# Return a list of all documents found, recursively.
|
28
|
+
def crawler(url)
|
29
|
+
result = []
|
30
|
+
doc = Nokogiri::HTML.parse(open(url))
|
31
|
+
row = doc.xpath("//tr[4]").first
|
32
|
+
while(row)do
|
33
|
+
row_doc = Nokogiri::HTML(row.to_s)
|
34
|
+
link = row_doc.xpath("//a").first
|
35
|
+
if(link)then
|
36
|
+
href = url + link.attribute("href").value
|
37
|
+
if(href =~ /\/$/)then
|
38
|
+
result = result + crawler(href)
|
39
|
+
else
|
40
|
+
result << href
|
41
|
+
end
|
42
|
+
end
|
43
|
+
row = row.next
|
44
|
+
end
|
45
|
+
return result
|
46
|
+
end
|
47
|
+
|
48
|
+
# Scrape an issue
|
49
|
+
def scrape_periodical(url)
|
50
|
+
html = open(url).read
|
51
|
+
|
52
|
+
doc = Nokogiri::HTML.parse(html)
|
53
|
+
|
54
|
+
# Detect encoding
|
55
|
+
doc.encoding = 'iso-8859-1'
|
56
|
+
|
57
|
+
if(doc.to_s =~ /æ|ø|å/)then
|
58
|
+
puts "Encoding detected: iso-8859-1"
|
59
|
+
else
|
60
|
+
doc2 = Nokogiri::HTML.parse(html)
|
61
|
+
doc2.encoding = 'utf-8'
|
62
|
+
if(doc2.to_s =~ /æ|ø|å/)then # This method only works for norwegian
|
63
|
+
puts "Encoding detected: utf-8"
|
64
|
+
doc = Nokogiri::HTML.parse(html)
|
65
|
+
doc.encoding = 'utf-8'
|
66
|
+
else
|
67
|
+
puts "Encoding detected: unknown"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
issue = { }
|
72
|
+
issue[:title] = clean_string( doc.css('.MenuHeading1').inner_text )
|
73
|
+
issue[:title] =~ /,(.*)/
|
74
|
+
folder_title = clean_string( $1 )
|
75
|
+
folder_title = folder_title[0..0].upcase + folder_title[1..9999]
|
76
|
+
issue[:folder_title] = folder_title
|
77
|
+
issue[:introduction] = clean_string( doc.css('.MenuHeading2').inner_text )
|
78
|
+
issue[:body] = clean_html(doc.xpath("//ul")).to_s
|
79
|
+
|
80
|
+
url =~ /([^\/]*)-(.*)\..*$/
|
81
|
+
issue[:year] = $1
|
82
|
+
issue[:folder_name] = $2
|
83
|
+
url =~ /([^\/|]*)\.html$/
|
84
|
+
path = 'http://www.med.uio.no/ipsy/ssff/suicidologi/' + $1 + "/"
|
85
|
+
issue[:files] = crawler(path)
|
86
|
+
return issue
|
87
|
+
end
|
88
|
+
|
89
|
+
# Remove unwanted tags from body
|
90
|
+
def clean_html(doc)
|
91
|
+
|
92
|
+
# Remove font tags
|
93
|
+
doc.xpath('//font').each do |node|
|
94
|
+
node.children.each do |child|
|
95
|
+
child.parent = node.parent
|
96
|
+
end
|
97
|
+
node.remove
|
98
|
+
end
|
99
|
+
|
100
|
+
# Remove path to links:
|
101
|
+
doc.xpath('//a').each do |node|
|
102
|
+
href = node.attr("href")
|
103
|
+
href =~ /([^\/]*)$/
|
104
|
+
node.set_attribute("href", $1)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Remove <br> tags within li elements
|
108
|
+
doc.xpath('//li').each do |li|
|
109
|
+
li.xpath('//br').each do |br|
|
110
|
+
br.remove
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# Remove <p> tags within li elements
|
115
|
+
doc.xpath('//li').each do |li|
|
116
|
+
li.xpath('//p').each do |p|
|
117
|
+
p.children.each do |child|
|
118
|
+
child.parent = p.parent
|
119
|
+
end
|
120
|
+
p.remove
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
return doc
|
125
|
+
end
|
126
|
+
|
127
|
+
def create_folders(issue)
|
128
|
+
puts "Creating folders?"
|
129
|
+
year_folder = @uri.path + issue[:year]
|
130
|
+
if(not(@vortex.exists?(year_folder)))then
|
131
|
+
puts " Creating folder #{year_folder}/"
|
132
|
+
@vortex.mkdir(year_folder)
|
133
|
+
@vortex.proppatch(year_folder, '<v:resourceType xmlns:v="vrtx">article-listing</v:resourceType>')
|
134
|
+
@vortex.proppatch(year_folder, '<v:collection-type xmlns:v="vrtx">article-listing</v:collection-type>')
|
135
|
+
end
|
136
|
+
|
137
|
+
issue_folder = year_folder + "/" + issue[:folder_name]
|
138
|
+
if(not(@vortex.exists?(issue_folder)))then
|
139
|
+
puts " Creating folder #{issue_folder}/"
|
140
|
+
@vortex.mkdir(issue_folder)
|
141
|
+
@vortex.proppatch(issue_folder, '<v:resourceType xmlns:v="vrtx">article-listing</v:resourceType>')
|
142
|
+
@vortex.proppatch(issue_folder, '<v:collection-type xmlns:v="vrtx">article-listing</v:collection-type>')
|
143
|
+
@vortex.proppatch(issue_folder, '<v:userTitle xmlns:v="vrtx">' + issue[:folder_title] + '</v:userTitle>')
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
def copy_files(issue)
|
149
|
+
puts "Copying pdf files."
|
150
|
+
issue[:files].each do |url|
|
151
|
+
url =~ /([^\/]*)$/
|
152
|
+
basename = $1
|
153
|
+
content = open(url).read
|
154
|
+
path = @uri.path + issue[:year] + "/" + issue[:folder_name] + "/" + basename
|
155
|
+
puts url + " => " + path
|
156
|
+
@vortex.put_string(path,content)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def publish_article(issue)
|
161
|
+
puts "Publising article"
|
162
|
+
pathname = @uri.path + issue[:year] + "/" + issue[:folder_name] + "/index.html"
|
163
|
+
article = Vortex::StructuredArticle.new(:title => issue[:title],
|
164
|
+
:introduction => issue[:introduction],
|
165
|
+
:body => issue[:body],
|
166
|
+
:url => pathname,
|
167
|
+
:publishedDate => Time.now ) #,
|
168
|
+
# :author => "Halvor Aarnes")
|
169
|
+
path = @vortex.publish(article)
|
170
|
+
end
|
171
|
+
|
172
|
+
def migrate_issue(url)
|
173
|
+
issue = scrape_periodical(url)
|
174
|
+
debug = false
|
175
|
+
if(debug)then
|
176
|
+
puts "Year : '#{issue[:year]}'"
|
177
|
+
puts "folder: '#{issue[:folder_name]}' / '#{issue[:folder_title]}'"
|
178
|
+
puts "Tittel: '#{issue[:title]}'"
|
179
|
+
puts "Intro : '#{issue[:introduction]}'"
|
180
|
+
puts "Body : '#{issue[:body][0..110]}.."
|
181
|
+
end
|
182
|
+
# require 'pp'
|
183
|
+
# pp issue[:files]
|
184
|
+
# puts
|
185
|
+
|
186
|
+
create_folders(issue)
|
187
|
+
publish_article(issue)
|
188
|
+
copy_files(issue)
|
189
|
+
end
|
190
|
+
|
191
|
+
def migrate_all_issues(url)
|
192
|
+
files =crawler(url)
|
193
|
+
files.each do |file|
|
194
|
+
migrate_issue(file)
|
195
|
+
puts file
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
# Scrape all webpages found in src_url and store in dest_url
|
202
|
+
dest_url = 'https://nyweb1-dav.uio.no/konv/ssff/suicidologi/'
|
203
|
+
src_url = 'http://www.med.uio.no/ipsy/ssff/suicidologi/innholdsfortegnelser/'
|
204
|
+
migration = MigrateSuicidologi.new(dest_url)
|
205
|
+
migration.migrate_all_issues(src_url)
|
206
|
+
|
207
|
+
|
208
|
+
# url = 'http://www.med.uio.no/ipsy/ssff/suicidologi/innholdsfortegnelser/2009-nr1.html'
|
209
|
+
# TODO
|
210
|
+
# - Sette publisert dato til år og ....?
|
211
|
+
# - Alle ingressene har tegnsettproblemer? Iconv?
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'vortex_client'
|
3
|
+
|
4
|
+
vortex = Vortex::Connection.new("https://www-dav.uio.no/")
|
5
|
+
|
6
|
+
no_right_margin = '<hideAdditionalContent ' +
|
7
|
+
'xmlns="http://www.uio.no/vrtx/__vrtx/ns/structured-resources">true</hideAdditionalContent>'
|
8
|
+
|
9
|
+
vortex.find('/konv/om/profil/', :recursive=>true)do|item|
|
10
|
+
item.proppatch(no_right_margin)
|
11
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'vortex_client'
|
3
|
+
|
4
|
+
vortex = Vortex::Connection.new("https://www-dav.vortex-demo.uio.no/", :use_osx_keychain => true)
|
5
|
+
props = vortex.propfind('/index.html')
|
6
|
+
|
7
|
+
vortex.proppatch('/index.html','<v:title xmlns:v="vrtx">Forside vortex demo</v:title>')
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'vortex_client'
|
3
|
+
require 'json'
|
4
|
+
require 'scrape_vortex_search'
|
5
|
+
require 'pp'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
# Replace text in json document on server
|
9
|
+
# Optional resourcetype can
|
10
|
+
def replace(host, path, from, to, *resourcetype)
|
11
|
+
verbose = false
|
12
|
+
host = host.sub(/\/$/,'')
|
13
|
+
uri = URI.parse(path)
|
14
|
+
url = host + uri.path
|
15
|
+
url = url.sub(/\/$/,'/index.html')
|
16
|
+
|
17
|
+
vortex = Vortex::Connection.new(host,:use_osx_keychain => true)
|
18
|
+
vortex.find(url) do |item|
|
19
|
+
|
20
|
+
data = nil
|
21
|
+
begin
|
22
|
+
data = JSON.parse(item.content)
|
23
|
+
rescue
|
24
|
+
if(verbose)then
|
25
|
+
puts "Warning. Bad document. Not json: " + item.uri.to_s
|
26
|
+
end
|
27
|
+
return 0
|
28
|
+
end
|
29
|
+
|
30
|
+
if(resourcetype[0] and resourcetype[0].class == String and data["resourcetype"] != resourcetype[0] )then
|
31
|
+
if(verbose)then
|
32
|
+
puts "Warning: resourcetype not: " + resourcetype[0]
|
33
|
+
end
|
34
|
+
return 0
|
35
|
+
end
|
36
|
+
|
37
|
+
puts "Oppdaterer: " + item.uri.to_s
|
38
|
+
new_content = item.content.sub(from,to)
|
39
|
+
item.content = new_content
|
40
|
+
return 1
|
41
|
+
end
|
42
|
+
end
|