livejournal 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/ruby
2
+ #--
3
+ # ljrb -- LiveJournal Ruby module
4
+ # Copyright (c) 2005 Evan Martin <martine@danga.com>
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+ #++
24
+
25
+
26
+ require 'livejournal/basic'
27
+ require 'cgi'
28
+ require 'net/http'
29
+ require 'date'
30
+
31
+ module LiveJournal
32
+ module Request
33
+ class ProtocolException < RuntimeError; end
34
+
35
+ # ljtimes look like 2005-12-04 10:24:00.
36
+ # Convert a Time to an ljtime.
37
+ def self.time_to_ljtime time
38
+ time.strftime '%Y-%m-%d %H:%M:%S'
39
+ end
40
+ # Convert an ljtime to a Time.
41
+ def self.ljtime_to_time str
42
+ dt = DateTime.strptime(str, '%Y-%m-%d %H:%M')
43
+ Time.gm(dt.year, dt.mon, dt.day, dt.hour, dt.min, 0, 0)
44
+ end
45
+
46
+ # wrapper around a given hash, prefixing all key lookups with base
47
+ class HashStrip #:nodoc:
48
+ def initialize(base, hash)
49
+ @base = base
50
+ @hash = hash
51
+ end
52
+ def [](key)
53
+ @hash[@base + key]
54
+ end
55
+ end
56
+
57
+ # Superclass for all LiveJournal requests.
58
+ class Req #:nodoc:
59
+ def initialize(user, mode)
60
+ @user = user
61
+ @reqparams = { "user" => user.username,
62
+ "password" => user.password,
63
+ "mode" => mode,
64
+ "clientversion" => "Ruby",
65
+ "ver" => 1 }
66
+ @reqparams['usejournal'] = user.usejournal if user.usejournal
67
+ @results = {}
68
+ @verbose = false
69
+ @dryrun = false
70
+ end
71
+
72
+ def verbose!; @verbose = true; end
73
+ def dryrun!; @dryrun = true; end
74
+
75
+ def []=(key, value)
76
+ @reqparams[key] = value
77
+ end
78
+
79
+ def [](key)
80
+ @result[key]
81
+ end
82
+
83
+ def run
84
+ h = Net::HTTP.new('www.livejournal.com')
85
+ h.set_debug_output $stderr if @verbose
86
+ request = @reqparams.collect { |key, value|
87
+ "#{CGI.escape(key)}=#{CGI.escape(value.to_s)}"
88
+ }.join("&")
89
+ p request if @verbose
90
+ return if @dryrun
91
+ response, data = h.post('/interface/flat', request)
92
+ parseresponse(data)
93
+ dumpresponse if @verbose
94
+ if self['success'] != "OK"
95
+ raise ProtocolException, self['errmsg']
96
+ end
97
+ end
98
+
99
+ def dumpresponse
100
+ @result.keys.sort.each { |key| puts "#{key} -> #{@result[key]}" }
101
+ end
102
+
103
+ protected
104
+ def parseresponse(data)
105
+ lines = data.split(/\r?\n/)
106
+ @result = {}
107
+ 0.step(lines.length-1, 2) do |i|
108
+ @result[lines[i]] = lines[i+1]
109
+ end
110
+ end
111
+
112
+ def each_in_array(base)
113
+ for i in 1..(self["#{base}_count"].to_i) do
114
+ yield HashStrip.new("#{base}_#{i.to_s}_", self)
115
+ end
116
+ end
117
+ def build_array(base)
118
+ array = []
119
+ each_in_array(base) { |x| array << yield(x) }
120
+ array
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+ # vim: ts=2 sw=2 et :
@@ -0,0 +1,185 @@
1
+ #!/usr/bin/ruby
2
+ #--
3
+ # ljrb -- LiveJournal Ruby module
4
+ # Copyright (c) 2005 Evan Martin <martine@danga.com>
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+ #++
24
+
25
+ # A full sync involves both the LiveJournal flat protocol, for entries:
26
+ require 'livejournal/request'
27
+ # As well as a custom XML format via REST, for comments:
28
+ # http://www.livejournal.com/developer/exporting.bml
29
+ require 'net/http'
30
+ require 'uri'
31
+ require 'livejournal/comments-xml'
32
+
33
+ require 'livejournal/entry'
34
+ require 'livejournal/comment'
35
+
36
+ module LiveJournal
37
+ module Request
38
+ class SyncItems < Req
39
+ attr_reader :syncitems, :fetched, :total
40
+ def initialize(user, syncitems=nil, lastsync=nil)
41
+ super(user, 'syncitems')
42
+ @syncitems = syncitems || {}
43
+ self['lastsync'] = lastsync if lastsync
44
+ end
45
+
46
+ def run
47
+ super
48
+ lasttime = nil
49
+ @fetched = 0
50
+ @total = self['sync_total'].to_i
51
+ each_in_array('sync') do |item|
52
+ item, time = item['item'], item['time']
53
+ next if @syncitems.has_key? item
54
+ @fetched += 1
55
+ lasttime = time if lasttime.nil? or time > lasttime
56
+ @syncitems[item] = time
57
+ end
58
+ lasttime
59
+ end
60
+
61
+ def self.subset_items(syncitems, want_type='L')
62
+ items = {}
63
+ syncitems.each do |item, time|
64
+ next unless item =~ /^(.)-(\d+)$/
65
+ type, id = $1, $2.to_i
66
+ items[id] = time if type == want_type
67
+ end
68
+ items
69
+ end
70
+ end
71
+
72
+ class SessionGenerate < Req
73
+ def initialize(user)
74
+ super(user, 'sessiongenerate')
75
+ end
76
+ def run
77
+ super
78
+ self['ljsession']
79
+ end
80
+ end
81
+ end
82
+
83
+ # Journal export. A full export involves syncing both entries and comments.
84
+ # See <tt>samples/export</tt> for a full example.
85
+ module Sync
86
+
87
+ # To run a sync, create a Sync::Entries object, then call
88
+ # Entries#run_syncitems to fetch the sync metadata, then call
89
+ # Entries#run_sync to get the actual entries.
90
+ class Entries
91
+ # To resume from a previous sync, pass in its lastsync value.
92
+ attr_reader :lastsync
93
+
94
+ def initialize(user, lastsync=nil)
95
+ @user = user
96
+ @logitems = {}
97
+ @lastsync = lastsync
98
+ end
99
+ def run_syncitems # :yields: cur, total
100
+ cur = 0
101
+ total = nil
102
+ items = {}
103
+ lastsync = @lastsync
104
+ while total.nil? or cur < total
105
+ req = Request::SyncItems.new(@user, items, lastsync)
106
+ lastsync = req.run
107
+ cur += req.fetched
108
+ total = req.total unless total
109
+ yield cur, total if block_given?
110
+ end
111
+ @logitems = Request::SyncItems::subset_items(items, 'L')
112
+ return (not @logitems.empty?)
113
+ end
114
+
115
+ def run_sync # :yields: entries_hash, lastsync, remaining_count
116
+ return if @logitems.empty?
117
+
118
+ lastsync = @lastsync
119
+ while @logitems.size > 0
120
+ req = Request::GetEvents.new(@user, :lastsync => lastsync)
121
+ entries = req.run
122
+ # pop off all items that we now have entries for
123
+ entries.each do |itemid, entry|
124
+ time = @logitems.delete itemid
125
+ lastsync = time if lastsync.nil? or time > lastsync
126
+ end
127
+ yield entries, lastsync, @logitems.size
128
+ end
129
+ end
130
+ end
131
+
132
+ class Comments
133
+ def initialize(user)
134
+ @user = user
135
+ @session = nil
136
+ @maxid = nil
137
+ end
138
+
139
+ def run_GET(mode, start)
140
+ unless @session
141
+ req = Request::SessionGenerate.new(@user)
142
+ @session = req.run
143
+ end
144
+
145
+ path = "/export_comments.bml?get=comment_#{mode}&startid=#{start}"
146
+ # authas: hooray for features discovered by reading source!
147
+ path += "&authas=#{@user.usejournal}" if @user.usejournal
148
+ url = URI.parse(@user.server.url + path)
149
+
150
+ http = Net::HTTP.new(url.host, url.port)
151
+ resp, data = http.get(url, 'Cookie' => "ljsession=#{@session}")
152
+ # XXX we'd like to stream this data to the XML parser.
153
+ # is this possible?
154
+ raise RuntimeError, "XXX" if resp.code != '200'
155
+ return data
156
+ end
157
+ private :run_GET
158
+
159
+ def run_metadata(start=0) # :yields: cur, total, comments_hash
160
+ while @maxid.nil? or start < @maxid
161
+ data = run_GET('meta', start)
162
+ parsed = LiveJournal::Sync::CommentsXML::Parser.new(data)
163
+ @maxid ||= parsed.maxid
164
+ break if parsed.comments.empty?
165
+ cur = parsed.comments.keys.max
166
+ yield cur, @maxid, parsed
167
+ start = cur + 1
168
+ end
169
+ end
170
+
171
+ def run_body(start=0) # :yields: cur, total, comments_hash
172
+ while start < @maxid
173
+ data = run_GET('body', start)
174
+ parsed = LiveJournal::Sync::CommentsXML::Parser.new(data)
175
+ break if parsed.comments.empty?
176
+ cur = parsed.comments.keys.max
177
+ yield cur, @maxid, parsed
178
+ start = cur + 1
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+
185
+ # vim: ts=2 sw=2 et :
data/sample/export ADDED
@@ -0,0 +1,154 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'livejournal/sync'
4
+ require 'livejournal/database'
5
+ require 'optparse'
6
+
7
+ class ProgressBar
8
+ def initialize(caption)
9
+ @cur = 0
10
+ @width = 40
11
+ print caption
12
+ print("[" + " "*40 + "]" + "\x08"*41)
13
+ $stdout.flush
14
+ end
15
+
16
+ def fill_to(pos)
17
+ dots = pos - @cur
18
+ print "."*dots
19
+ $stdout.flush
20
+ @cur = pos
21
+ end
22
+
23
+ def update(cur, max)
24
+ fill_to(@width*cur/max)
25
+ end
26
+
27
+ def finish(error=false)
28
+ return unless @cur >= 0
29
+ fill_to(@width) unless error
30
+ puts
31
+ @cur = -1
32
+ end
33
+
34
+ def self.with_progress(caption)
35
+ bar = ProgressBar.new(caption)
36
+ begin
37
+ yield bar
38
+ rescue => e
39
+ bar.finish(true)
40
+ raise e
41
+ else
42
+ bar.finish(false)
43
+ end
44
+ end
45
+ end
46
+
47
+ username = nil
48
+ password = nil
49
+ usejournal = nil
50
+ dbfile = nil
51
+ get_entries = true
52
+ get_comments = true
53
+ opts = OptionParser.new do |opts|
54
+ opts.on('-u', '--user USERNAME',
55
+ 'Login username') { |username| }
56
+ opts.on('-p', '--password PASSWORD',
57
+ 'Login password') { |password| }
58
+ opts.on('-a', '--usejournal JOURNAL',
59
+ 'Journal to sync (USERNAME must have access)') { |usejournal| }
60
+ opts.on('-d', '--db FILENAME',
61
+ 'Filename for output database') { |dbfile| }
62
+ opts.on('--nocomments',
63
+ "Don't fetch comments (only entries)") { get_comments = false }
64
+ opts.on('--noentries',
65
+ "Don't fetch entries (only comments)") { get_entries = false }
66
+ end
67
+ opts.parse!(ARGV)
68
+
69
+ unless username
70
+ puts opts
71
+ puts "ERROR: Must specify username."
72
+ exit 1
73
+ end
74
+
75
+ unless dbfile
76
+ puts opts
77
+ puts "ERROR: Must specify database file."
78
+ exit 1
79
+ end
80
+
81
+ unless password
82
+ print "Enter password (WARNING: echoed to screen): "
83
+ $stdout.flush
84
+ password = gets.strip
85
+ end
86
+
87
+ user = LiveJournal::User.new(username, password)
88
+ user.usejournal = usejournal
89
+ db = LiveJournal::Database.new(dbfile, true)
90
+ db.username = user.usejournal || user.username
91
+
92
+ if get_entries
93
+ puts "Fetching entries..."
94
+ lastsync = db.lastsync
95
+ puts "Resuming from #{lastsync}." if lastsync
96
+ sync = LiveJournal::Sync::Entries.new(user, lastsync)
97
+
98
+ ProgressBar::with_progress("Fetching metadata: ") do |bar|
99
+ sync.run_syncitems do |cur, total|
100
+ bar.update(cur, total)
101
+ end
102
+ end
103
+
104
+ ProgressBar::with_progress("Fetching bodies: ") do |bar|
105
+ cur = 0
106
+ sync.run_sync do |entries, lastsync, remaining|
107
+ db.transaction do
108
+ entries.each do |itemid, entry|
109
+ db.store_entry entry
110
+ end
111
+ end
112
+ db.lastsync = lastsync
113
+
114
+ cur += entries.length
115
+ bar.update(cur, remaining+cur)
116
+ end
117
+ end
118
+ end
119
+
120
+ if get_comments
121
+ puts "Fetching comments..."
122
+
123
+ cs = LiveJournal::Sync::Comments.new(user)
124
+ next_meta = db.last_comment_meta
125
+ if next_meta
126
+ next_meta += 1
127
+ else
128
+ next_meta = 0
129
+ end
130
+
131
+ ProgressBar::with_progress("Fetching metadata: ") do |bar|
132
+ cs.run_metadata(next_meta) do |cur, max, data|
133
+ db.store_comments_meta data.comments
134
+ db.store_usermap data.usermap
135
+ bar.update(cur, max)
136
+ end
137
+ end
138
+
139
+ next_full = db.last_comment_full
140
+ if next_full
141
+ next_full += 1
142
+ else
143
+ next_full = 0
144
+ end
145
+
146
+ ProgressBar::with_progress("Fetching bodies: ") do |bar|
147
+ cs.run_body(next_full) do |cur, max, data|
148
+ bar.update(cur, max)
149
+ db.store_comments_full data.comments
150
+ end
151
+ end
152
+ end
153
+
154
+ # vim: ts=2 sw=2 et :