vortex_client 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/examples/README.rdoc +30 -0
- data/examples/create_personpresentations.rb +147 -0
- data/examples/dice.gif +0 -0
- data/examples/import_static_site.rb +211 -0
- data/examples/no_right_margin.rb +11 -0
- data/examples/propfind_proppatch.rb +7 -0
- data/examples/replace_spike.rb +42 -0
- data/examples/scrape_hero_publications.rb +38 -0
- data/examples/scrape_holocaust.rb +351 -0
- data/examples/scrape_holocaust_related_links.rb +134 -0
- data/examples/scrape_vortex_search.rb +44 -0
- data/examples/search_replace_documents.rb +57 -0
- data/examples/search_replace_documents_frontpage.rb +88 -0
- data/examples/test_searc_replace.rb +3 -0
- data/examples/unpublish.rb +11 -0
- data/examples/upload_image.rb +8 -0
- data/examples/upload_image_flymake.rb +8 -0
- data/lib/vortex_client.rb +2 -1
- data/test/test_vortex_article_publish.rb +3 -16
- metadata +48 -17
- data/.gitignore +0 -21
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.7.0
|
@@ -0,0 +1,30 @@
|
|
1
|
+
= Vortex Client Examples
|
2
|
+
|
3
|
+
= publish_article.rb
|
4
|
+
|
5
|
+
Creates a standard article. Stores file as JSON on server, which is the
|
6
|
+
|
7
|
+
= upload_file.rb
|
8
|
+
|
9
|
+
Upload local (binary) file to server.
|
10
|
+
|
11
|
+
= personer_presentation.rb
|
12
|
+
|
13
|
+
Create a person presentation that displays info from ldap and publications database.
|
14
|
+
|
15
|
+
= publish_event.rb
|
16
|
+
|
17
|
+
Publish an event with iCal link.
|
18
|
+
|
19
|
+
= sitemap.rb
|
20
|
+
|
21
|
+
Creates a file with that can be used as a basis for a sitemap.
|
22
|
+
|
23
|
+
= create_collection.rb
|
24
|
+
|
25
|
+
Creates various types of directories, called collections in webdav, on server.
|
26
|
+
|
27
|
+
= unpublish.rb
|
28
|
+
|
29
|
+
Hides documents for visitors by setting the documents unpublish-date to current time and date.
|
30
|
+
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# Create person presentations in vortex.
|
4
|
+
#
|
5
|
+
# Input is a vortex folder with images like this:
|
6
|
+
#
|
7
|
+
# /person/
|
8
|
+
# /adm/
|
9
|
+
# username.jpg
|
10
|
+
# user2.jpg
|
11
|
+
#
|
12
|
+
# For each image, the script creates a folder, a person presentation named index.html
|
13
|
+
# and moves the image file. After completion, the folder should look like this:
|
14
|
+
#
|
15
|
+
# /person/
|
16
|
+
# /adm/
|
17
|
+
# username/
|
18
|
+
# index.html
|
19
|
+
# username.jpg
|
20
|
+
# user2/
|
21
|
+
# index.html
|
22
|
+
# user2.jpg
|
23
|
+
#
|
24
|
+
# Author: Thomas Flemming thomasfl (at) usit.uio.no
|
25
|
+
|
26
|
+
require 'rubygems'
|
27
|
+
require 'vortex_client'
|
28
|
+
require 'open-uri'
|
29
|
+
require 'time'
|
30
|
+
require 'ldap_util'
|
31
|
+
|
32
|
+
def create_person_listing_folder(new_url)
|
33
|
+
mkdir(new_url)
|
34
|
+
puts "Creating folder: " + new_url
|
35
|
+
props = '<v:collection-type xmlns:v="vrtx">person-listing</v:collection-type>' +
|
36
|
+
'<v:resourceType xmlns:v="vrtx">person-listing</v:resourceType>'
|
37
|
+
begin
|
38
|
+
@vortex.proppatch(new_url, props )
|
39
|
+
rescue
|
40
|
+
puts "Warning: problems patching folder: " + new_url
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def mkdir(url)
|
45
|
+
begin
|
46
|
+
@vortex.mkdir(url)
|
47
|
+
rescue
|
48
|
+
puts "Warning: mkdir(" + url + ") exists."
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def copy(src,dest)
|
53
|
+
begin
|
54
|
+
@vortex.copy(src,dest)
|
55
|
+
rescue
|
56
|
+
puts "Warning: cp(src," + dest + ") exists."
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def move(src,dest)
|
61
|
+
begin
|
62
|
+
@vortex.move(src,dest)
|
63
|
+
rescue
|
64
|
+
puts "Warning: move(src," + dest + ") exists."
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def delete(dest)
|
70
|
+
begin
|
71
|
+
@vortex.delete(dest)
|
72
|
+
rescue
|
73
|
+
puts "Warning: delete(" + dest + ") failed."
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def set_realname_as_title(username, path)
|
78
|
+
realname = ldap_realname(username)
|
79
|
+
if(realname) then
|
80
|
+
@vortex.proppatch(path, '<v:userTitle xmlns:v="vrtx">' + realname + '</v:userTitle>')
|
81
|
+
else
|
82
|
+
puts "Warning: Unable to get info from ldap on: " + username
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def create_person_presentation(url, dest_folder, language)
|
87
|
+
username = dest_folder.sub(/\/$/,'')[/([^\/]*)$/,1]
|
88
|
+
dest_image_url = dest_folder + url[/([^\/]*)$/,1]
|
89
|
+
mkdir(dest_folder)
|
90
|
+
set_realname_as_title(username, dest_folder)
|
91
|
+
|
92
|
+
copy(url,dest_image_url)
|
93
|
+
|
94
|
+
if(url.to_s.match(/\/vit\//))then
|
95
|
+
scientific = true
|
96
|
+
else
|
97
|
+
scientific = false
|
98
|
+
end
|
99
|
+
# scientific = true
|
100
|
+
|
101
|
+
person = Vortex::Person.new(:user => username,
|
102
|
+
:image => dest_image_url,
|
103
|
+
:language => language,
|
104
|
+
:scientific => scientific,
|
105
|
+
:url => dest_folder + 'index.html')
|
106
|
+
|
107
|
+
# Use the 'administrative' html template.
|
108
|
+
# Override default html template used for scientific presentations.
|
109
|
+
# person.html = person.create_html(:language => language, :html_template => :administrative)
|
110
|
+
|
111
|
+
@vortex.publish(person)
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
def create_presentations_from_images(src_url, dest_url, language)
|
116
|
+
count = 0
|
117
|
+
@vortex.find(src_url,:recursive => true,:suppress_errors => true) do |item|
|
118
|
+
url = item.url.to_s
|
119
|
+
if(item.type == :directory) then
|
120
|
+
new_url = url.gsub(src_url,dest_url)
|
121
|
+
create_person_listing_folder(new_url)
|
122
|
+
elsif(url.match(/\.jpg$|\.png$/i)) then
|
123
|
+
dest_folder = url.gsub(src_url,dest_url)
|
124
|
+
dest_folder = dest_folder.sub(/\.jpg$|\.png$/i,'/')
|
125
|
+
create_person_presentation(url, dest_folder, language)
|
126
|
+
end
|
127
|
+
count += 1
|
128
|
+
end
|
129
|
+
return count
|
130
|
+
end
|
131
|
+
|
132
|
+
# src_url = 'https://nyweb3-dav.uio.no/konv/ubo/'
|
133
|
+
src_url = 'https://nyweb1-dav.uio.no/personer/genererte-presentasjoner/econ/'
|
134
|
+
@vortex = Vortex::Connection.new(src_url, :use_osx_keychain => true)
|
135
|
+
|
136
|
+
# puts "Restore from backup..."
|
137
|
+
# delete(src_url)
|
138
|
+
# copy('https://nyweb3-dav.uio.no/konv/ubo_backup/', src_url)
|
139
|
+
|
140
|
+
dest_url = 'https://nyweb1-dav.uio.no/personer/genererte-presentasjoner/econ_generert/' ## https://nyweb3-dav.uio.no/konv/ubo_no/'
|
141
|
+
count = create_presentations_from_images(src_url, dest_url, :norwegian)
|
142
|
+
puts "\n\nDone. Created " + count.to_s + " presentations."
|
143
|
+
|
144
|
+
# dest_url = 'https://nyweb3-dav.uio.no/konv/ubo_en/'
|
145
|
+
# count = create_presentations_from_images(src_url, dest_url, :english)
|
146
|
+
# puts "\n\nDone. Created " + count.to_s + " presentations."
|
147
|
+
|
data/examples/dice.gif
ADDED
Binary file
|
@@ -0,0 +1,211 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'rubygems'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'vortex_client'
|
5
|
+
require 'uri'
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'htmlentities'
|
8
|
+
require 'json'
|
9
|
+
require 'iconv'
|
10
|
+
|
11
|
+
class MigrateSuicidologi
|
12
|
+
attr :vortex, :uri
|
13
|
+
|
14
|
+
def initialize(url)
|
15
|
+
@vortex = Vortex::Connection.new(url,:use_osx_keychain => true)
|
16
|
+
@uri = URI.parse(url)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Common regexp for title and introduction
|
20
|
+
def clean_string(string)
|
21
|
+
string = string.gsub(/\r|\n/,'').sub(/^ */,'').sub(/\s*$/,'').gsub(/ +/,' ')
|
22
|
+
coder = HTMLEntities.new()
|
23
|
+
string = coder.decode(string) # Remove html entities
|
24
|
+
return string
|
25
|
+
end
|
26
|
+
|
27
|
+
# Return a list of all documents found, recursively.
|
28
|
+
def crawler(url)
|
29
|
+
result = []
|
30
|
+
doc = Nokogiri::HTML.parse(open(url))
|
31
|
+
row = doc.xpath("//tr[4]").first
|
32
|
+
while(row)do
|
33
|
+
row_doc = Nokogiri::HTML(row.to_s)
|
34
|
+
link = row_doc.xpath("//a").first
|
35
|
+
if(link)then
|
36
|
+
href = url + link.attribute("href").value
|
37
|
+
if(href =~ /\/$/)then
|
38
|
+
result = result + crawler(href)
|
39
|
+
else
|
40
|
+
result << href
|
41
|
+
end
|
42
|
+
end
|
43
|
+
row = row.next
|
44
|
+
end
|
45
|
+
return result
|
46
|
+
end
|
47
|
+
|
48
|
+
# Scrape an issue
|
49
|
+
def scrape_periodical(url)
|
50
|
+
html = open(url).read
|
51
|
+
|
52
|
+
doc = Nokogiri::HTML.parse(html)
|
53
|
+
|
54
|
+
# Detect encoding
|
55
|
+
doc.encoding = 'iso-8859-1'
|
56
|
+
|
57
|
+
if(doc.to_s =~ /æ|ø|å/)then
|
58
|
+
puts "Encoding detected: iso-8859-1"
|
59
|
+
else
|
60
|
+
doc2 = Nokogiri::HTML.parse(html)
|
61
|
+
doc2.encoding = 'utf-8'
|
62
|
+
if(doc2.to_s =~ /æ|ø|å/)then # This method only works for norwegian
|
63
|
+
puts "Encoding detected: utf-8"
|
64
|
+
doc = Nokogiri::HTML.parse(html)
|
65
|
+
doc.encoding = 'utf-8'
|
66
|
+
else
|
67
|
+
puts "Encoding detected: unknown"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
issue = { }
|
72
|
+
issue[:title] = clean_string( doc.css('.MenuHeading1').inner_text )
|
73
|
+
issue[:title] =~ /,(.*)/
|
74
|
+
folder_title = clean_string( $1 )
|
75
|
+
folder_title = folder_title[0..0].upcase + folder_title[1..9999]
|
76
|
+
issue[:folder_title] = folder_title
|
77
|
+
issue[:introduction] = clean_string( doc.css('.MenuHeading2').inner_text )
|
78
|
+
issue[:body] = clean_html(doc.xpath("//ul")).to_s
|
79
|
+
|
80
|
+
url =~ /([^\/]*)-(.*)\..*$/
|
81
|
+
issue[:year] = $1
|
82
|
+
issue[:folder_name] = $2
|
83
|
+
url =~ /([^\/|]*)\.html$/
|
84
|
+
path = 'http://www.med.uio.no/ipsy/ssff/suicidologi/' + $1 + "/"
|
85
|
+
issue[:files] = crawler(path)
|
86
|
+
return issue
|
87
|
+
end
|
88
|
+
|
89
|
+
# Remove unwanted tags from body
|
90
|
+
def clean_html(doc)
|
91
|
+
|
92
|
+
# Remove font tags
|
93
|
+
doc.xpath('//font').each do |node|
|
94
|
+
node.children.each do |child|
|
95
|
+
child.parent = node.parent
|
96
|
+
end
|
97
|
+
node.remove
|
98
|
+
end
|
99
|
+
|
100
|
+
# Remove path to links:
|
101
|
+
doc.xpath('//a').each do |node|
|
102
|
+
href = node.attr("href")
|
103
|
+
href =~ /([^\/]*)$/
|
104
|
+
node.set_attribute("href", $1)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Remove <br> tags within li elements
|
108
|
+
doc.xpath('//li').each do |li|
|
109
|
+
li.xpath('//br').each do |br|
|
110
|
+
br.remove
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# Remove <p> tags within li elements
|
115
|
+
doc.xpath('//li').each do |li|
|
116
|
+
li.xpath('//p').each do |p|
|
117
|
+
p.children.each do |child|
|
118
|
+
child.parent = p.parent
|
119
|
+
end
|
120
|
+
p.remove
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
return doc
|
125
|
+
end
|
126
|
+
|
127
|
+
def create_folders(issue)
|
128
|
+
puts "Creating folders?"
|
129
|
+
year_folder = @uri.path + issue[:year]
|
130
|
+
if(not(@vortex.exists?(year_folder)))then
|
131
|
+
puts " Creating folder #{year_folder}/"
|
132
|
+
@vortex.mkdir(year_folder)
|
133
|
+
@vortex.proppatch(year_folder, '<v:resourceType xmlns:v="vrtx">article-listing</v:resourceType>')
|
134
|
+
@vortex.proppatch(year_folder, '<v:collection-type xmlns:v="vrtx">article-listing</v:collection-type>')
|
135
|
+
end
|
136
|
+
|
137
|
+
issue_folder = year_folder + "/" + issue[:folder_name]
|
138
|
+
if(not(@vortex.exists?(issue_folder)))then
|
139
|
+
puts " Creating folder #{issue_folder}/"
|
140
|
+
@vortex.mkdir(issue_folder)
|
141
|
+
@vortex.proppatch(issue_folder, '<v:resourceType xmlns:v="vrtx">article-listing</v:resourceType>')
|
142
|
+
@vortex.proppatch(issue_folder, '<v:collection-type xmlns:v="vrtx">article-listing</v:collection-type>')
|
143
|
+
@vortex.proppatch(issue_folder, '<v:userTitle xmlns:v="vrtx">' + issue[:folder_title] + '</v:userTitle>')
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
def copy_files(issue)
|
149
|
+
puts "Copying pdf files."
|
150
|
+
issue[:files].each do |url|
|
151
|
+
url =~ /([^\/]*)$/
|
152
|
+
basename = $1
|
153
|
+
content = open(url).read
|
154
|
+
path = @uri.path + issue[:year] + "/" + issue[:folder_name] + "/" + basename
|
155
|
+
puts url + " => " + path
|
156
|
+
@vortex.put_string(path,content)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def publish_article(issue)
|
161
|
+
puts "Publising article"
|
162
|
+
pathname = @uri.path + issue[:year] + "/" + issue[:folder_name] + "/index.html"
|
163
|
+
article = Vortex::StructuredArticle.new(:title => issue[:title],
|
164
|
+
:introduction => issue[:introduction],
|
165
|
+
:body => issue[:body],
|
166
|
+
:url => pathname,
|
167
|
+
:publishedDate => Time.now ) #,
|
168
|
+
# :author => "Halvor Aarnes")
|
169
|
+
path = @vortex.publish(article)
|
170
|
+
end
|
171
|
+
|
172
|
+
def migrate_issue(url)
|
173
|
+
issue = scrape_periodical(url)
|
174
|
+
debug = false
|
175
|
+
if(debug)then
|
176
|
+
puts "Year : '#{issue[:year]}'"
|
177
|
+
puts "folder: '#{issue[:folder_name]}' / '#{issue[:folder_title]}'"
|
178
|
+
puts "Tittel: '#{issue[:title]}'"
|
179
|
+
puts "Intro : '#{issue[:introduction]}'"
|
180
|
+
puts "Body : '#{issue[:body][0..110]}.."
|
181
|
+
end
|
182
|
+
# require 'pp'
|
183
|
+
# pp issue[:files]
|
184
|
+
# puts
|
185
|
+
|
186
|
+
create_folders(issue)
|
187
|
+
publish_article(issue)
|
188
|
+
copy_files(issue)
|
189
|
+
end
|
190
|
+
|
191
|
+
def migrate_all_issues(url)
|
192
|
+
files =crawler(url)
|
193
|
+
files.each do |file|
|
194
|
+
migrate_issue(file)
|
195
|
+
puts file
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
# Scrape all webpages found in src_url and store in dest_url
|
202
|
+
dest_url = 'https://nyweb1-dav.uio.no/konv/ssff/suicidologi/'
|
203
|
+
src_url = 'http://www.med.uio.no/ipsy/ssff/suicidologi/innholdsfortegnelser/'
|
204
|
+
migration = MigrateSuicidologi.new(dest_url)
|
205
|
+
migration.migrate_all_issues(src_url)
|
206
|
+
|
207
|
+
|
208
|
+
# url = 'http://www.med.uio.no/ipsy/ssff/suicidologi/innholdsfortegnelser/2009-nr1.html'
|
209
|
+
# TODO
|
210
|
+
# - Sette publisert dato til år og ....?
|
211
|
+
# - Alle ingressene har tegnsettproblemer? Iconv?
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'vortex_client'
|
3
|
+
|
4
|
+
vortex = Vortex::Connection.new("https://www-dav.uio.no/")
|
5
|
+
|
6
|
+
no_right_margin = '<hideAdditionalContent ' +
|
7
|
+
'xmlns="http://www.uio.no/vrtx/__vrtx/ns/structured-resources">true</hideAdditionalContent>'
|
8
|
+
|
9
|
+
vortex.find('/konv/om/profil/', :recursive=>true)do|item|
|
10
|
+
item.proppatch(no_right_margin)
|
11
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'vortex_client'
|
3
|
+
|
4
|
+
vortex = Vortex::Connection.new("https://www-dav.vortex-demo.uio.no/", :use_osx_keychain => true)
|
5
|
+
props = vortex.propfind('/index.html')
|
6
|
+
|
7
|
+
vortex.proppatch('/index.html','<v:title xmlns:v="vrtx">Forside vortex demo</v:title>')
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'vortex_client'
|
3
|
+
require 'json'
|
4
|
+
require 'scrape_vortex_search'
|
5
|
+
require 'pp'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
# Replace text in json document on server
|
9
|
+
# Optional resourcetype can
|
10
|
+
def replace(host, path, from, to, *resourcetype)
|
11
|
+
verbose = false
|
12
|
+
host = host.sub(/\/$/,'')
|
13
|
+
uri = URI.parse(path)
|
14
|
+
url = host + uri.path
|
15
|
+
url = url.sub(/\/$/,'/index.html')
|
16
|
+
|
17
|
+
vortex = Vortex::Connection.new(host,:use_osx_keychain => true)
|
18
|
+
vortex.find(url) do |item|
|
19
|
+
|
20
|
+
data = nil
|
21
|
+
begin
|
22
|
+
data = JSON.parse(item.content)
|
23
|
+
rescue
|
24
|
+
if(verbose)then
|
25
|
+
puts "Warning. Bad document. Not json: " + item.uri.to_s
|
26
|
+
end
|
27
|
+
return 0
|
28
|
+
end
|
29
|
+
|
30
|
+
if(resourcetype[0] and resourcetype[0].class == String and data["resourcetype"] != resourcetype[0] )then
|
31
|
+
if(verbose)then
|
32
|
+
puts "Warning: resourcetype not: " + resourcetype[0]
|
33
|
+
end
|
34
|
+
return 0
|
35
|
+
end
|
36
|
+
|
37
|
+
puts "Oppdaterer: " + item.uri.to_s
|
38
|
+
new_content = item.content.sub(from,to)
|
39
|
+
item.content = new_content
|
40
|
+
return 1
|
41
|
+
end
|
42
|
+
end
|