livejournal 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/ruby
2
+ #--
3
+ # ljrb -- LiveJournal Ruby module
4
+ # Copyright (c) 2005 Evan Martin <martine@danga.com>
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+ #++
24
+
25
+
26
+ require 'livejournal/basic'
27
+ require 'cgi'
28
+ require 'net/http'
29
+ require 'date'
30
+
31
+ module LiveJournal
32
+ module Request
33
+ class ProtocolException < RuntimeError; end
34
+
35
+ # ljtimes look like 2005-12-04 10:24:00.
36
+ # Convert a Time to an ljtime.
37
+ def self.time_to_ljtime time
38
+ time.strftime '%Y-%m-%d %H:%M:%S'
39
+ end
40
+ # Convert an ljtime to a Time.
41
+ def self.ljtime_to_time str
42
+ dt = DateTime.strptime(str, '%Y-%m-%d %H:%M')
43
+ Time.gm(dt.year, dt.mon, dt.day, dt.hour, dt.min, 0, 0)
44
+ end
45
+
46
+ # wrapper around a given hash, prefixing all key lookups with base
47
+ class HashStrip #:nodoc:
48
+ def initialize(base, hash)
49
+ @base = base
50
+ @hash = hash
51
+ end
52
+ def [](key)
53
+ @hash[@base + key]
54
+ end
55
+ end
56
+
57
+ # Superclass for all LiveJournal requests.
58
+ class Req #:nodoc:
59
+ def initialize(user, mode)
60
+ @user = user
61
+ @reqparams = { "user" => user.username,
62
+ "password" => user.password,
63
+ "mode" => mode,
64
+ "clientversion" => "Ruby",
65
+ "ver" => 1 }
66
+ @reqparams['usejournal'] = user.usejournal if user.usejournal
67
+ @results = {}
68
+ @verbose = false
69
+ @dryrun = false
70
+ end
71
+
72
+ def verbose!; @verbose = true; end
73
+ def dryrun!; @dryrun = true; end
74
+
75
+ def []=(key, value)
76
+ @reqparams[key] = value
77
+ end
78
+
79
+ def [](key)
80
+ @result[key]
81
+ end
82
+
83
+ def run
84
+ h = Net::HTTP.new('www.livejournal.com')
85
+ h.set_debug_output $stderr if @verbose
86
+ request = @reqparams.collect { |key, value|
87
+ "#{CGI.escape(key)}=#{CGI.escape(value.to_s)}"
88
+ }.join("&")
89
+ p request if @verbose
90
+ return if @dryrun
91
+ response, data = h.post('/interface/flat', request)
92
+ parseresponse(data)
93
+ dumpresponse if @verbose
94
+ if self['success'] != "OK"
95
+ raise ProtocolException, self['errmsg']
96
+ end
97
+ end
98
+
99
+ def dumpresponse
100
+ @result.keys.sort.each { |key| puts "#{key} -> #{@result[key]}" }
101
+ end
102
+
103
+ protected
104
+ def parseresponse(data)
105
+ lines = data.split(/\r?\n/)
106
+ @result = {}
107
+ 0.step(lines.length-1, 2) do |i|
108
+ @result[lines[i]] = lines[i+1]
109
+ end
110
+ end
111
+
112
+ def each_in_array(base)
113
+ for i in 1..(self["#{base}_count"].to_i) do
114
+ yield HashStrip.new("#{base}_#{i.to_s}_", self)
115
+ end
116
+ end
117
+ def build_array(base)
118
+ array = []
119
+ each_in_array(base) { |x| array << yield(x) }
120
+ array
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+ # vim: ts=2 sw=2 et :
@@ -0,0 +1,185 @@
1
+ #!/usr/bin/ruby
2
+ #--
3
+ # ljrb -- LiveJournal Ruby module
4
+ # Copyright (c) 2005 Evan Martin <martine@danga.com>
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+ #++
24
+
25
+ # A full sync involves both the LiveJournal flat protocol, for entries:
26
+ require 'livejournal/request'
27
+ # As well as a custom XML format via REST, for comments:
28
+ # http://www.livejournal.com/developer/exporting.bml
29
+ require 'net/http'
30
+ require 'uri'
31
+ require 'livejournal/comments-xml'
32
+
33
+ require 'livejournal/entry'
34
+ require 'livejournal/comment'
35
+
36
+ module LiveJournal
37
+ module Request
38
+ class SyncItems < Req
39
+ attr_reader :syncitems, :fetched, :total
40
+ def initialize(user, syncitems=nil, lastsync=nil)
41
+ super(user, 'syncitems')
42
+ @syncitems = syncitems || {}
43
+ self['lastsync'] = lastsync if lastsync
44
+ end
45
+
46
+ def run
47
+ super
48
+ lasttime = nil
49
+ @fetched = 0
50
+ @total = self['sync_total'].to_i
51
+ each_in_array('sync') do |item|
52
+ item, time = item['item'], item['time']
53
+ next if @syncitems.has_key? item
54
+ @fetched += 1
55
+ lasttime = time if lasttime.nil? or time > lasttime
56
+ @syncitems[item] = time
57
+ end
58
+ lasttime
59
+ end
60
+
61
+ def self.subset_items(syncitems, want_type='L')
62
+ items = {}
63
+ syncitems.each do |item, time|
64
+ next unless item =~ /^(.)-(\d+)$/
65
+ type, id = $1, $2.to_i
66
+ items[id] = time if type == want_type
67
+ end
68
+ items
69
+ end
70
+ end
71
+
72
+ class SessionGenerate < Req
73
+ def initialize(user)
74
+ super(user, 'sessiongenerate')
75
+ end
76
+ def run
77
+ super
78
+ self['ljsession']
79
+ end
80
+ end
81
+ end
82
+
83
+ # Journal export. A full export involves syncing both entries and comments.
84
+ # See <tt>samples/export</tt> for a full example.
85
+ module Sync
86
+
87
+ # To run a sync, create a Sync::Entries object, then call
88
+ # Entries#run_syncitems to fetch the sync metadata, then call
89
+ # Entries#run_sync to get the actual entries.
90
+ class Entries
91
+ # To resume from a previous sync, pass in its lastsync value.
92
+ attr_reader :lastsync
93
+
94
+ def initialize(user, lastsync=nil)
95
+ @user = user
96
+ @logitems = {}
97
+ @lastsync = lastsync
98
+ end
99
+ def run_syncitems # :yields: cur, total
100
+ cur = 0
101
+ total = nil
102
+ items = {}
103
+ lastsync = @lastsync
104
+ while total.nil? or cur < total
105
+ req = Request::SyncItems.new(@user, items, lastsync)
106
+ lastsync = req.run
107
+ cur += req.fetched
108
+ total = req.total unless total
109
+ yield cur, total if block_given?
110
+ end
111
+ @logitems = Request::SyncItems::subset_items(items, 'L')
112
+ return (not @logitems.empty?)
113
+ end
114
+
115
+ def run_sync # :yields: entries_hash, lastsync, remaining_count
116
+ return if @logitems.empty?
117
+
118
+ lastsync = @lastsync
119
+ while @logitems.size > 0
120
+ req = Request::GetEvents.new(@user, :lastsync => lastsync)
121
+ entries = req.run
122
+ # pop off all items that we now have entries for
123
+ entries.each do |itemid, entry|
124
+ time = @logitems.delete itemid
125
+ lastsync = time if lastsync.nil? or time > lastsync
126
+ end
127
+ yield entries, lastsync, @logitems.size
128
+ end
129
+ end
130
+ end
131
+
132
+ class Comments
133
+ def initialize(user)
134
+ @user = user
135
+ @session = nil
136
+ @maxid = nil
137
+ end
138
+
139
+ def run_GET(mode, start)
140
+ unless @session
141
+ req = Request::SessionGenerate.new(@user)
142
+ @session = req.run
143
+ end
144
+
145
+ path = "/export_comments.bml?get=comment_#{mode}&startid=#{start}"
146
+ # authas: hooray for features discovered by reading source!
147
+ path += "&authas=#{@user.usejournal}" if @user.usejournal
148
+ url = URI.parse(@user.server.url + path)
149
+
150
+ http = Net::HTTP.new(url.host, url.port)
151
+ resp, data = http.get(url, 'Cookie' => "ljsession=#{@session}")
152
+ # XXX we'd like to stream this data to the XML parser.
153
+ # is this possible?
154
+ raise RuntimeError, "XXX" if resp.code != '200'
155
+ return data
156
+ end
157
+ private :run_GET
158
+
159
+ def run_metadata(start=0) # :yields: cur, total, comments_hash
160
+ while @maxid.nil? or start < @maxid
161
+ data = run_GET('meta', start)
162
+ parsed = LiveJournal::Sync::CommentsXML::Parser.new(data)
163
+ @maxid ||= parsed.maxid
164
+ break if parsed.comments.empty?
165
+ cur = parsed.comments.keys.max
166
+ yield cur, @maxid, parsed
167
+ start = cur + 1
168
+ end
169
+ end
170
+
171
+ def run_body(start=0) # :yields: cur, total, comments_hash
172
+ while start < @maxid
173
+ data = run_GET('body', start)
174
+ parsed = LiveJournal::Sync::CommentsXML::Parser.new(data)
175
+ break if parsed.comments.empty?
176
+ cur = parsed.comments.keys.max
177
+ yield cur, @maxid, parsed
178
+ start = cur + 1
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+
185
+ # vim: ts=2 sw=2 et :
data/sample/export ADDED
@@ -0,0 +1,154 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'livejournal/sync'
4
+ require 'livejournal/database'
5
+ require 'optparse'
6
+
7
+ class ProgressBar
8
+ def initialize(caption)
9
+ @cur = 0
10
+ @width = 40
11
+ print caption
12
+ print("[" + " "*40 + "]" + "\x08"*41)
13
+ $stdout.flush
14
+ end
15
+
16
+ def fill_to(pos)
17
+ dots = pos - @cur
18
+ print "."*dots
19
+ $stdout.flush
20
+ @cur = pos
21
+ end
22
+
23
+ def update(cur, max)
24
+ fill_to(@width*cur/max)
25
+ end
26
+
27
+ def finish(error=false)
28
+ return unless @cur >= 0
29
+ fill_to(@width) unless error
30
+ puts
31
+ @cur = -1
32
+ end
33
+
34
+ def self.with_progress(caption)
35
+ bar = ProgressBar.new(caption)
36
+ begin
37
+ yield bar
38
+ rescue => e
39
+ bar.finish(true)
40
+ raise e
41
+ else
42
+ bar.finish(false)
43
+ end
44
+ end
45
+ end
46
+
47
+ username = nil
48
+ password = nil
49
+ usejournal = nil
50
+ dbfile = nil
51
+ get_entries = true
52
+ get_comments = true
53
+ opts = OptionParser.new do |opts|
54
+ opts.on('-u', '--user USERNAME',
55
+ 'Login username') { |username| }
56
+ opts.on('-p', '--password PASSWORD',
57
+ 'Login password') { |password| }
58
+ opts.on('-a', '--usejournal JOURNAL',
59
+ 'Journal to sync (USERNAME must have access)') { |usejournal| }
60
+ opts.on('-d', '--db FILENAME',
61
+ 'Filename for output database') { |dbfile| }
62
+ opts.on('--nocomments',
63
+ "Don't fetch comments (only entries)") { get_comments = false }
64
+ opts.on('--noentries',
65
+ "Don't fetch entries (only comments)") { get_entries = false }
66
+ end
67
+ opts.parse!(ARGV)
68
+
69
+ unless username
70
+ puts opts
71
+ puts "ERROR: Must specify username."
72
+ exit 1
73
+ end
74
+
75
+ unless dbfile
76
+ puts opts
77
+ puts "ERROR: Must specify database file."
78
+ exit 1
79
+ end
80
+
81
+ unless password
82
+ print "Enter password (WARNING: echoed to screen): "
83
+ $stdout.flush
84
+ password = gets.strip
85
+ end
86
+
87
+ user = LiveJournal::User.new(username, password)
88
+ user.usejournal = usejournal
89
+ db = LiveJournal::Database.new(dbfile, true)
90
+ db.username = user.usejournal || user.username
91
+
92
+ if get_entries
93
+ puts "Fetching entries..."
94
+ lastsync = db.lastsync
95
+ puts "Resuming from #{lastsync}." if lastsync
96
+ sync = LiveJournal::Sync::Entries.new(user, lastsync)
97
+
98
+ ProgressBar::with_progress("Fetching metadata: ") do |bar|
99
+ sync.run_syncitems do |cur, total|
100
+ bar.update(cur, total)
101
+ end
102
+ end
103
+
104
+ ProgressBar::with_progress("Fetching bodies: ") do |bar|
105
+ cur = 0
106
+ sync.run_sync do |entries, lastsync, remaining|
107
+ db.transaction do
108
+ entries.each do |itemid, entry|
109
+ db.store_entry entry
110
+ end
111
+ end
112
+ db.lastsync = lastsync
113
+
114
+ cur += entries.length
115
+ bar.update(cur, remaining+cur)
116
+ end
117
+ end
118
+ end
119
+
120
+ if get_comments
121
+ puts "Fetching comments..."
122
+
123
+ cs = LiveJournal::Sync::Comments.new(user)
124
+ next_meta = db.last_comment_meta
125
+ if next_meta
126
+ next_meta += 1
127
+ else
128
+ next_meta = 0
129
+ end
130
+
131
+ ProgressBar::with_progress("Fetching metadata: ") do |bar|
132
+ cs.run_metadata(next_meta) do |cur, max, data|
133
+ db.store_comments_meta data.comments
134
+ db.store_usermap data.usermap
135
+ bar.update(cur, max)
136
+ end
137
+ end
138
+
139
+ next_full = db.last_comment_full
140
+ if next_full
141
+ next_full += 1
142
+ else
143
+ next_full = 0
144
+ end
145
+
146
+ ProgressBar::with_progress("Fetching bodies: ") do |bar|
147
+ cs.run_body(next_full) do |cur, max, data|
148
+ bar.update(cur, max)
149
+ db.store_comments_full data.comments
150
+ end
151
+ end
152
+ end
153
+
154
+ # vim: ts=2 sw=2 et :