abelard 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 11d17040bfbcb446c38d93ce3ddaf614a55fb7c1
4
+ data.tar.gz: b40187d7a1c261912e3e2655e7783a6e4b9915c7
5
+ SHA512:
6
+ metadata.gz: cb41b5744e105884515e1e8e8513bc6601c132a4b332e7981f6fe038df48b7ff7cb797bac6f3ac6c9a5d20d62656c1c46d7f50c361dce387b341591265086edc
7
+ data.tar.gz: be36ec1cb0beb93e410226221bbe4faacb95aed8f156aec17b2be2b8c6f4803939caa939ef01800463f1119da6c296ceafc5bedc17cbd7165e9c6074375b37e0
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ cmd = ARGV.shift
4
+
5
+ known = %w( load dump list web )
6
+
7
+ if ( known.include? cmd )
8
+ require "abelard/#{cmd}"
9
+ else
10
+ $stderr.puts "Unknown command #{cmd}"
11
+ $stderr.puts "Use one of #{known}"
12
+ end
@@ -0,0 +1,17 @@
1
+ require 'yaml'
2
+
3
+ class Archive
4
+ def initialize(file)
5
+ @configfile = file
6
+ @config = YAML.load_file(file)
7
+ end
8
+
9
+ def dir(blog)
10
+ Directory.new(@config[blog]["dest"])
11
+ end
12
+
13
+ def available
14
+ @config.keys
15
+ end
16
+
17
+ end
@@ -0,0 +1,166 @@
1
+ require 'libxml'
2
+ require 'time'
3
+ require 'abelard/history'
4
+
5
+ # known namespaces for xpath search
6
+ NS = [
7
+ "atom:http://www.w3.org/2005/Atom",
8
+ "dc:http://purl.org/dc/elements/1.1/",
9
+ "app:http://purl.org/atom/app#",
10
+ "wp:http://wordpress.org/export/1.2/"
11
+ ]
12
+
13
+ class Item
14
+ attr_accessor :timestamp, :title, :file, :doc, :author, :status
15
+ def initialize(xml, filename)
16
+ @doc = xml
17
+ @file = filename
18
+ timestamp_node = doc.find_first("/atom:entry/atom:published", NS) ||
19
+ doc.find_first("/item/pubDate")
20
+ if timestamp_node
21
+ @timestamp = Time.parse(timestamp_node.content)
22
+ else
23
+ @timestamp = Time.new(0)
24
+ end
25
+
26
+ title_node = doc.find_first("/atom:entry/atom:title", NS) ||
27
+ doc.find_first("/item/title")
28
+ if title_node
29
+ @title = title_node.content
30
+ else
31
+ @title = "Post"
32
+ end
33
+
34
+ author_node = doc.find_first("/atom:entry/atom:author/atom:name", NS) ||
35
+ doc.find_first("/item/dc:creator", NS)
36
+ if author_node
37
+ @author = author_node.content
38
+ else
39
+ @author = 'abelard'
40
+ end
41
+
42
+ @status = :published
43
+ status_node = doc.find_first("/item/wp:status", NS)
44
+ if status_node
45
+ $stderr.puts("raw status #{status_node.content}")
46
+ if status_node.content == "trash"
47
+ @status = :trash
48
+ elsif status_node.content == "draft"
49
+ @status = :draft
50
+ end
51
+ end
52
+
53
+ draft_node = doc.find_first("/atom:entry/app:control/app:draft", NS)
54
+ if draft_node
55
+ if draft_node.content == "yes"
56
+ @status = :draft
57
+ end
58
+ end
59
+ end
60
+
61
+ def save
62
+ puts("writing #{file}")
63
+ doc.save(file, :indent => true, :encoding => LibXML::XML::Encoding::UTF_8)
64
+ end
65
+ end
66
+
67
+ class Directory
68
+ def initialize(path)
69
+ @path = path
70
+ @base_doc = read_base_doc
71
+ @feed_type = case @base_doc.root.name
72
+ when "feed"
73
+ :atom
74
+ when "rss"
75
+ :rss
76
+ else
77
+ :unknown
78
+ end
79
+
80
+ @git = History.new(self, path)
81
+ end
82
+
83
+ def save
84
+ @git.commit_posts
85
+ end
86
+
87
+ def read_base_doc
88
+ feed = LibXML::XML::Parser.file("#{@path}/feed.xml").parse
89
+ if feed.root.name == "rss"
90
+ LibXML::XML::Parser.file("#{@path}/channel-1.xml").parse
91
+ else
92
+ feed
93
+ end
94
+ end
95
+
96
+ def base_doc
97
+ if ! @base_doc
98
+ @base_doc = read_base_doc
99
+ end
100
+ @base_doc
101
+ end
102
+
103
+ # iterates the Item objects for the feed, in order
104
+ def each
105
+ by_date = {}
106
+ each_unsorted do |post,filename|
107
+ item = Item.new(post,filename)
108
+ by_date[item.timestamp] = item
109
+ end
110
+ by_date.keys.sort.map { |dt| yield by_date[dt] }
111
+ end
112
+
113
+ def info
114
+ inf = {}
115
+ el = base_doc.find_first("/atom:feed/atom:title", NS) ||
116
+ base_doc.find_first("/rss/channel/title")
117
+ inf["title"] = el.content
118
+ inf
119
+ end
120
+
121
+ def posts_feed
122
+ feed = read_base_doc
123
+ case @feed_type
124
+ when :atom
125
+ posts_feed_atom(feed)
126
+ when :rss
127
+ posts_feed_rss(feed)
128
+ end
129
+ end
130
+
131
+ def insert_posts(collection)
132
+ each do |post|
133
+ $stderr.puts "adding #{post.file}"
134
+ collection << collection.doc.import(post.doc.root)
135
+ end
136
+ collection
137
+ end
138
+
139
+ def each_unsorted
140
+ Dir.glob("#{@path}/post-*.xml") do |filename|
141
+ post = LibXML::XML::Parser.file(filename).parse
142
+ yield post, filename
143
+ end
144
+ end
145
+
146
+ def posts_feed_atom(doc)
147
+ insert_posts(doc.root)
148
+ doc
149
+ end
150
+
151
+ def posts_feed_rss(rssdoc)
152
+ doc = LibXML::XML::Parser.file("#{@path}/channel-1.xml").parse
153
+ channel = doc.find_first("/rss/channel");
154
+ insert_posts(channel)
155
+ doc
156
+ end
157
+
158
+ def sort_entries(repo_entries)
159
+ by_date = repo_entries.map do |e|
160
+ { :entry => e,
161
+ :time => Item.new(LibXML::XML::Parser.file(e.path).parse, e.path ).timestamp }
162
+ end
163
+ by_date.sort! { |a,b| a[:time] <=> b[:time] }
164
+ by_date.map { |hash| hash[:entry] }
165
+ end
166
+ end
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'abelard/dir.rb'
4
+
5
+ dir = Directory.new(ARGV[0])
6
+ puts dir.posts_feed.to_s
@@ -0,0 +1,142 @@
1
+ # Manage a git repository representing the feed
2
+
3
+ require 'rugged'
4
+ require 'pathname'
5
+
6
+ class History
7
+ # archive is a Directory, dir is a path to store in git
8
+ def initialize(archive, dir)
9
+ @archive = archive
10
+ if File.directory? dir
11
+ begin
12
+ repo = Rugged::Repository.discover(dir)
13
+
14
+ repo_base = Pathname.new(repo.workdir).realpath.to_s
15
+ real_dir = Pathname.new(dir).realpath.to_s
16
+ raise "confused! #{repo_base} #{real_dir}" unless real_dir.start_with?(repo_base)
17
+ @relative_root = real_dir[repo_base.length+1..-1] || ""
18
+ $stderr.puts "#{real_dir} in #{repo_base} : #{@relative_root}"
19
+
20
+ check_repo_clean(repo, @relative_root)
21
+ rescue Rugged::RepositoryError
22
+ repo = Rugged::Repository.init_at(dir)
23
+ @relative_root = ""
24
+ end
25
+ elsif File.exist? dir
26
+ fail "#{dir} exists as file"
27
+ else
28
+ Dir.mkdir(dir)
29
+ repo = Rugged::Repository.init_at(dir)
30
+ @relative_root = ""
31
+ end
32
+ @repo = repo
33
+ @dir_path = dir
34
+ end
35
+
36
+ class Entry
37
+ # dir_fn is the path to the file relative to the feed directory
38
+ # git_fn is the path to the file relative to the git root
39
+ # path is the full path
40
+
41
+ attr_reader :git_fn, :dir_fn, :path
42
+ def initialize(f, root, repository)
43
+ @git_fn = f
44
+
45
+ @dir_fn = if root.empty?
46
+ f
47
+ else
48
+ f[root.length+1..-1]
49
+ end
50
+
51
+ @path = repository.workdir + '/' + @dir_fn
52
+ end
53
+ end
54
+
55
+ def entry(from_git)
56
+ Entry.new(from_git, @relative_root, @repo)
57
+ end
58
+
59
+ def check_repo_clean(repo, sub)
60
+ $stderr.puts "check_repo_clean(#{repo},#{sub})"
61
+ clean = true
62
+ repo.status do |file, data|
63
+ change = classify_file(sub, file)
64
+ clean = false if change == :real
65
+ end
66
+ clean
67
+ end
68
+
69
+ def commit_posts
70
+ repo = @repo
71
+ sub = @relative_root
72
+
73
+ commits = 0
74
+ todo = { real: [] }
75
+ @repo.status do |file, data|
76
+ change = classify_file(sub, file)
77
+ todo[change] ||= []
78
+ todo[change] << file
79
+ end
80
+ if todo[:top]
81
+ todo[:top].each do |file|
82
+ repo.index.add file
83
+ repo_entry = entry(file)
84
+ item = Item.new(LibXML::XML::Parser.file(repo_entry.path).parse, repo_entry.path)
85
+
86
+ author = {:email => "#{item.author}@example.org",
87
+ :time => item.timestamp,
88
+ :name => item.author}
89
+ parents = []
90
+ parents << repo.head.target unless repo.head_unborn?
91
+ commit = Rugged::Commit.create(repo,
92
+ :author => author,
93
+ :message => "feed info",
94
+ :committer => author,
95
+ :parents => parents,
96
+ :tree => repo.index.write_tree(repo),
97
+ :update_ref => "HEAD")
98
+ commits = commits+1
99
+ end
100
+ end
101
+
102
+ to_commit = @archive.sort_entries(todo[:real].map { |f| entry(f) })
103
+
104
+ to_commit.each do |entry|
105
+ file = entry.git_fn
106
+ repo.index.add file
107
+ repo_entry = entry(file)
108
+ item = Item.new(LibXML::XML::Parser.file(repo_entry.path).parse, repo_entry.path)
109
+
110
+ author = {:email => "#{item.author}@example.org",
111
+ :time => item.timestamp,
112
+ :name => item.author}
113
+
114
+ $stderr.puts "Adding #{file}"
115
+
116
+ commit = Rugged::Commit.create(repo,
117
+ :author => author,
118
+ :message => "post",
119
+ :committer => author,
120
+ :parents => [repo.head.target],
121
+ :tree => repo.index.write_tree(repo),
122
+ :update_ref => "HEAD")
123
+ commits = commits+1
124
+ end
125
+
126
+ repo.index.write if commits > 0
127
+ end
128
+
129
+ def classify_file(subdir, file)
130
+ # normally 1 archive = 1 repo, but if you have a repo of several
131
+ # archives, ignore file changes outside
132
+ return :outside unless file.start_with?(subdir)
133
+
134
+ filename = Pathname.new(file).basename.to_s
135
+
136
+ return :real if filename.start_with?("post-") or filename.start_with?("comment-")
137
+ return :top if filename.start_with?("feed") or filename.start_with?("channel")
138
+
139
+ return :unknown
140
+ end
141
+ end
142
+
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'yaml'
4
+ require 'abelard/dir.rb'
5
+ CONFIG_FILE = "blogfeeds.yaml"
6
+
7
+ dest=''
8
+ if ARGV.length > 0 then
9
+ if (ARGV[0] == '-h') then
10
+ $stderr.puts("abelard list -d <dir>\nabelard list <config-entry>
11
+ abelard list\n")
12
+ else
13
+ if (ARGV[0] == '-d') then
14
+ dest = ARGV[1]
15
+ else
16
+ configs = YAML.load_file(CONFIG_FILE)
17
+ conf = configs[ARGV[0]]
18
+ dest = conf['dest']
19
+ end
20
+ dir = Directory.new(dest)
21
+ dir.each do |item|
22
+ printf("%s %s\n", item.timestamp.strftime("%Y-%m-%d"), item.title)
23
+ end
24
+ end
25
+ else
26
+ configs = YAML.load_file(CONFIG_FILE)
27
+ configs.each do |name, conf|
28
+ puts("#{name}: #{conf['urls'].first}")
29
+ end
30
+ end
31
+
@@ -0,0 +1,360 @@
1
+ #!/usr/bin/env ruby
2
+ require 'libxml'
3
+ require 'net/http'
4
+ require 'yaml'
5
+
6
+ require 'abelard/dir.rb'
7
+ require 'abelard/postxml.rb'
8
+
9
+ CONFIG_FILE = "blogfeeds.yaml"
10
+
11
+ Debug = true
12
+
13
+
14
+ module Splitter
15
+ def item(xmlnode, filename)
16
+ filedoc = LibXML::XML::Document.new()
17
+ filedoc.root = xmlnode.copy(true)
18
+ item = Item.new(filedoc, filename)
19
+ end
20
+
21
+ # deprecated
22
+ def write_item(xmlnode, file)
23
+ filedoc = LibXML::XML::Document.new()
24
+ filedoc.root = xmlnode.copy(true)
25
+ puts("writing #{file}")
26
+ filedoc.save(file, :indent => true, :encoding => LibXML::XML::Encoding::UTF_8)
27
+ end
28
+
29
+ # stream the document to a string and reparse it to clean up redundant namespaces
30
+ def write_doc_clean(doc, file)
31
+ cleandoc = LibXML::XML::Parser.string(doc.to_s, :options => LibXML::XML::Parser::Options::NSCLEAN).parse
32
+ cleandoc.save(file)
33
+ end
34
+ end
35
+
36
+ class Splitter_rss
37
+ include Splitter
38
+ NS = ['wp:http://wordpress.org/export/1.2/'];
39
+
40
+ def initialize(document, destination)
41
+ @doc = document
42
+ @dest = destination
43
+ end
44
+
45
+ def extract_comments(item)
46
+ # In a wordpress export, the comments are in wp:comment elements
47
+ basename = Post_id_rss.new(item)
48
+
49
+ all = []
50
+ comment_nodes = item.find("wp:comment", NS)
51
+ comment_nodes.each do |node|
52
+ comment_doc = LibXML::XML::Document.new()
53
+ comment_doc.root = node.remove!
54
+ approved = comment_doc.find_first("/wp:comment/wp:comment_approved", NS)
55
+ author_email = comment_doc.find_first("/wp:comment/wp:comment_author_email", NS)
56
+ author_ip = comment_doc.find_first("/wp:comment/wp:comment_author_IP", NS)
57
+ id = comment_doc.find_first("/wp:comment/wp:comment_id", NS)
58
+
59
+ # delete some sensitive fields
60
+ author_email.remove! if (author_email)
61
+ author_ip.remove! if (author_ip)
62
+
63
+ if (approved && (approved.content == '1'))
64
+ filename = basename.as_comment(id.content)
65
+
66
+ all << Item.new(comment_doc, "#{@dest}/#{filename}")
67
+ end
68
+ end
69
+ all
70
+ end
71
+
72
+ def split_items
73
+ channel_count = 1
74
+ rss = @doc.root
75
+ @parent = LibXML::XML::Document.new()
76
+ root = LibXML::XML::Node.new(rss.name)
77
+ @parent.root = root
78
+ rss.attributes.each { |a| root.attributes[a.name] = a.value }
79
+ rss.children.select(&:element?).each do |channel|
80
+ if (channel.name == "channel")
81
+ root << channel.clone # shallow copy for feed.xml
82
+
83
+ channelself = XmlUtil::self_link(channel)
84
+ is_comments = (channelself =~ /comments/)
85
+
86
+ copy = LibXML::XML::Node.new(channel.name)
87
+ channel.attributes.each { |a| copy.attributes[a.name] = a.value }
88
+ channel.children.select(&:element?).each do |node|
89
+ $stderr.puts(node.name)
90
+ if (node.name == "item")
91
+ # attachments dont get saved as posts
92
+ if ( node.find("wp:attachment_url", "wp:http://wordpress.org/export/1.2/").length > 0 )
93
+ $stderr.puts("skipping attachment")
94
+ else
95
+ # in a wordpress export file, comments are included inside the post item
96
+ comments = extract_comments(node)
97
+ save(node)
98
+ comments.each { |c| c.save }
99
+ end
100
+ else
101
+ copy << node.copy(true)
102
+ end
103
+ end
104
+ ch_copy = root.copy(true)
105
+ ch_copy << copy
106
+ unless is_comments
107
+ channel_doc = LibXML::XML::Document.new()
108
+ channel_doc.root = ch_copy
109
+ channel_doc.save("#{@dest}/channel-#{channel_count}.xml")
110
+ end
111
+ channel_count = channel_count + 1
112
+ else
113
+ root << channel
114
+ end
115
+ end
116
+ @parent.save("#{@dest}/feed.xml")
117
+ end
118
+
119
+ def save(node)
120
+ filename = Post_id_rss.new(node).to_s
121
+ new_item = item(node, "#{@dest}/#{filename}")
122
+ if new_item.status == :published
123
+ new_item.save
124
+ else
125
+ $stderr.puts("skipping #{filename} as status #{new_item.status}")
126
+ end
127
+ end
128
+ end
129
+
130
+ class Splitter_atom
131
+ include Splitter
132
+ def initialize(document, destination)
133
+ @doc = document
134
+ @dest = destination
135
+ end
136
+
137
+ def split_items
138
+ feed = @doc.root
139
+
140
+ feedself = XmlUtil::self_link(feed)
141
+
142
+ @feed_type = nil # unknown
143
+ @feed_type = "post" if (feedself =~ %r{/posts/default$})
144
+ @feed_type = "comment" if (feedself =~ %r{/comments/default$})
145
+
146
+ @parent = LibXML::XML::Document.new()
147
+ root = LibXML::XML::Node.new(feed.name)
148
+ @parent.root = root
149
+ feed.namespaces.definitions.each {|ns| LibXML::XML::Namespace.new(root, ns.prefix, ns.href)}
150
+ feed.attributes.each { |a| root.attributes[a.name] = a.value }
151
+
152
+ feed.children.select(&:element?).each do |node|
153
+ if (node.name == "entry")
154
+ save(node)
155
+ else
156
+ root << @parent.import(node)
157
+ end
158
+ end
159
+
160
+ write_doc_clean(@parent, "#{@dest}/feed.xml")
161
+ end
162
+
163
+ def save(node)
164
+ id = node.children.find { |n| n.name == "id" }
165
+ id = id && id.content
166
+
167
+ path = XmlUtil::self_link(node)
168
+
169
+ case node.name
170
+ when "entry"
171
+ category = XmlUtil::child_attribute(node, "category", "term")
172
+
173
+ if @feed_type
174
+ entry_type = @feed_type
175
+ else
176
+ entry_type = category.split('#').last if category
177
+ end
178
+
179
+ case entry_type
180
+ when "post"
181
+ postnumber = path.split('/').last
182
+ filename = "#{@dest}/post-#{postnumber}.xml"
183
+ write_item(node, filename)
184
+ when "comment"
185
+ pathsplit = path.split('/')
186
+ postnumber = pathsplit[-4]
187
+ commentnumber = pathsplit[-1]
188
+ filename = "#{@dest}/comment-#{postnumber}-#{commentnumber}.xml"
189
+ write_item(node,filename)
190
+ end
191
+ end
192
+ end
193
+ end
194
+
195
+ class SourceBase
196
+ def process(parser, destination)
197
+ doc = parser.parse
198
+
199
+ case doc.root.name
200
+ when "feed"
201
+ atom = Splitter_atom.new(doc, destination)
202
+ atom.split_items
203
+ when "rss"
204
+ rss = Splitter_rss.new(doc, destination)
205
+ rss.split_items
206
+ else
207
+ puts "don't know what to do with element #{doc.root.name}"
208
+ end
209
+
210
+ archive = Directory.new(destination)
211
+ archive.save
212
+ end
213
+ end
214
+
215
+ class FileSource < SourceBase
216
+ def initialize(filename, dest)
217
+ @file = filename
218
+ @dest = dest
219
+ end
220
+
221
+ def load
222
+ ensure_dest(@dest)
223
+ parser = LibXML::XML::Parser.file(@file)
224
+ process(parser, @dest)
225
+ end
226
+ end
227
+
228
+ class Source < SourceBase
229
+ def initialize(conf_in)
230
+ if conf_in.respond_to?(:keys)
231
+ @conf = conf_in
232
+ else
233
+ @conf = get_config(conf_in) || die("No config for #{conf_in}")
234
+ end
235
+ end
236
+
237
+ def load
238
+ conf = @conf
239
+ dest = conf["dest"] || die("No 'dest' directory defined")
240
+ urls = conf["urls"] || die("No urls defined")
241
+
242
+ ensure_dest(dest)
243
+
244
+ fetcher = Fetcher.new
245
+ fetcher.user = conf["user"]
246
+ fetcher.password = conf["password"]
247
+
248
+ urls.each do |urlpath|
249
+ url = URI(urlpath)
250
+ fetcher.get(url) do |response|
251
+ write_raw(response.body, "#{dest}/raw.xml") if Debug
252
+ parser = LibXML::XML::Parser.string(response.body)
253
+ process(parser, dest)
254
+ end
255
+ end
256
+ end
257
+
258
+ def all_configs
259
+ YAML.load_file(CONFIG_FILE) || {}
260
+ end
261
+
262
+ def get_config(key)
263
+ configuration_file = all_configs
264
+ configuration_file[key]
265
+ end
266
+
267
+ def save_config(key, conf)
268
+ configuration_data = all_configs
269
+ if configuration_data.has_key? key
270
+ die("Already have config for #{key}")
271
+ else
272
+ configuration_data[key] = conf
273
+ open(CONFIG_FILE, 'w+') do |conf_file|
274
+ conf_file.puts(configuration_data.to_yaml)
275
+ end
276
+ end
277
+ end
278
+
279
+ def write_raw(data, filename)
280
+ File.open(filename, "w") { |f| f.write(data) }
281
+ end
282
+ end
283
+
284
+ def die(error)
285
+ puts error
286
+ exit 1
287
+ end
288
+
289
+ # Wrap an HTTP session, handle making a request and
290
+ # following any redirects
291
+ class Fetcher
292
+ def initialize
293
+ @host = nil
294
+ @session = nil
295
+ @user = nil
296
+ @password = nil
297
+ end
298
+ attr_accessor :user, :password
299
+
300
+ def get(url, max_depth=3)
301
+ if (url.host != @host)
302
+ @host = url.host
303
+ @session = Net::HTTP.new(@host, url.port)
304
+ @session.use_ssl = (url.scheme == 'https')
305
+ end
306
+ request = Net::HTTP::Get.new(url)
307
+ request.basic_auth(user, password) if user
308
+
309
+ msg = "Reading (#{url.to_s})"
310
+ msg << " as #{user}" if user
311
+ $stderr.puts(msg)
312
+
313
+ feedxml = @session.request(request)
314
+ if (feedxml.is_a? Net::HTTPOK)
315
+ yield feedxml
316
+ elsif (feedxml.is_a? Net::HTTPMovedPermanently )
317
+ new_url = feedxml['Location']
318
+ if ( new_url == url.to_s ) then
319
+ puts("Confused! redirect to same url #{new_url}")
320
+ else
321
+ if ( max_depth == 0 )
322
+ puts("Too many redirects")
323
+ else
324
+ puts("Redirecting to #{new_url}")
325
+ get(URI(new_url), max_depth-1) { |r| yield r }
326
+ end
327
+ end
328
+ else
329
+ puts("GET returned #{feedxml.code}")
330
+ puts(feedxml.body)
331
+ end
332
+ end
333
+ end
334
+
335
+ def ensure_dest(dest)
336
+ Dir::mkdir(dest) unless File.directory?(dest)
337
+ unless File.directory?(dest)
338
+ $stderr.puts "Could not create directory #{dest}"
339
+ end
340
+ end
341
+
342
+ if ARGV.empty?
343
+ $stderr.puts "Syntax:\nabelard load -f <filename> <destination>\nabelard load {-n <url>} ... <destination>\nabelard load <config>"
344
+ exit(1)
345
+ elsif ARGV[0] == "-f"
346
+ source = FileSource.new(ARGV[1], ARGV[2])
347
+ elsif ARGV[0] == '-n'
348
+ urls = []
349
+ while ARGV[0] == '-n'
350
+ ARGV.shift
351
+ urls << ARGV.shift
352
+ end
353
+ conf = {"urls" => urls, "dest" => ARGV[0]}
354
+ source = Source.new(conf)
355
+ source.save_config(conf["dest"],conf)
356
+ else
357
+ key = ARGV[0]
358
+ source = Source.new(key)
359
+ end
360
+ source.load
@@ -0,0 +1,80 @@
1
+ require 'uri'
2
+
3
+ module XmlUtil
4
+ def self.child_content(node, elementname)
5
+ el = node.children.find { |n| n.name == elementname }
6
+ el && el.content
7
+ end
8
+ def self.child_attribute(node, elementname, attributename)
9
+ el = node.children.find { |n| n.name == elementname }
10
+ attr = el && el.attributes.get_attribute("term")
11
+ attr && attr.value
12
+ end
13
+ def self.with_attribute(node, attributename, attributevalue)
14
+ a = node.attributes.get_attribute(attributename)
15
+ a && (a.value == attributevalue)
16
+ end
17
+ def self.self_link(node)
18
+ el = node.children.find { |l| (l.name == "link") && with_attribute(l, "rel", "self") }
19
+ el && el.attributes.get_attribute("href").value
20
+ end
21
+ end
22
+
23
+
24
+ class Post_id_rss
25
+ attr_reader :idurl
26
+ def initialize(postxml)
27
+ #XmlUtil::child_content(postxml, "post_id") ||
28
+ @idurl = XmlUtil::child_content(postxml, "guid")
29
+ @raw = postxml.to_s
30
+ end
31
+
32
+ def to_s
33
+ if !idurl
34
+ improvise
35
+ else
36
+ postnumber = post_match
37
+ commenturl = /\?p(age_id)?=(\d+)(\.xml)?#comment-(.*)$/.match(idurl) ||
38
+ /^(.*)\/(\d{4}\/.*)\/#(comment)-(.*)$/.match(idurl)
39
+
40
+ if commenturl
41
+ postnumber = commenturl[2].sub(/^\//,'').sub(/\.xml$/,'').gsub('/','-')
42
+ commentnumber = commenturl[4]
43
+ "comment-#{postnumber}-#{commentnumber}.xml"
44
+ else
45
+ "post-#{postnumber}.xml"
46
+ end
47
+ end
48
+ end
49
+
50
+ def post_match
51
+ posturl = /\?p(age_id)?=(\d+)(\.xml)?$/.match(idurl)
52
+ if posturl
53
+ posturl[2]
54
+ else
55
+ sanitize
56
+ end
57
+ end
58
+
59
+ def as_comment(commentnumber)
60
+ "comment-#{post_match}-#{commentnumber}.xml"
61
+ end
62
+
63
+ def sanitize
64
+ uri = URI(idurl)
65
+ $stderr.puts("Could not parse url #{idurl}") unless ( uri )
66
+ if ( uri.scheme == "tag" )
67
+ return idurl.split('-').last
68
+ end
69
+
70
+ build = uri.path.sub(/^\//,'').sub(/\.xml$/,'').gsub('/','-')
71
+ build.concat('-' + uri.query.gsub(/[?&]/,'-')) if uri.query
72
+ build.concat('-' + uri.fragment) if uri.fragment
73
+ build
74
+ end
75
+
76
+ def improvise
77
+ "post-%016x.xml" % @raw.hash
78
+ end
79
+ end
80
+
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+ require 'sinatra/base'
3
+ require 'yaml'
4
+ require 'abelard/dir.rb'
5
+ require 'abelard/archive.rb'
6
+
7
+ CONFIG_FILE="blogfeeds.yaml"
8
+
9
+
10
+
11
+ class FeedServer < Sinatra::Base
12
+ archive = Archive.new(CONFIG_FILE)
13
+ set :bind, "0.0.0.0"
14
+
15
+ get '/' do
16
+ template = <<ERB
17
+ <html><body><h2>Feeds</h2><dl>
18
+ <% archive.available.each do |blog| %>
19
+ <dt><%= blog %></dt>
20
+ <dd><%= archive.dir(blog).info["title"] %></dd>
21
+ <dd><a href="/<%= blog %>/posts">posts</a></dd>
22
+ <% end %>
23
+ </dl></body></html>
24
+ ERB
25
+ erb template, :locals => { :archive => archive }
26
+ end
27
+
28
+ get '/*/posts' do |blog|
29
+ headers "Content-Type" => "application/atom+xml"
30
+ archive.dir(blog).posts_feed.to_s
31
+ end
32
+
33
+ run!
34
+ end
35
+
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: abelard
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Anomaly UK
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-08-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rugged
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.23'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.23'
27
+ description: Persist blogs and similar web content as sharable git repositories
28
+ email: anomalyuk@tesco.net
29
+ executables:
30
+ - abelard
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - bin/abelard
35
+ - lib/abelard/archive.rb
36
+ - lib/abelard/dir.rb
37
+ - lib/abelard/dump.rb
38
+ - lib/abelard/history.rb
39
+ - lib/abelard/list.rb
40
+ - lib/abelard/load.rb
41
+ - lib/abelard/postxml.rb
42
+ - lib/abelard/web.rb
43
+ homepage: http://anomalyuk.blogspot.com/
44
+ licenses:
45
+ - GPL-2.0
46
+ metadata: {}
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubyforge_project:
63
+ rubygems_version: 2.5.2
64
+ signing_key:
65
+ specification_version: 4
66
+ summary: Abelard blog archiver
67
+ test_files: []