w3m-autopagerize 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,31 @@
1
+ #!/usr/local/bin/ruby19 -Ku
2
+ require 'drb'
3
+ require 'w3m-localcgi'
4
+ require 'kconv'
5
+
6
+ url = ENV['W3M_URL']
7
+
8
+ DRb.start_service
9
+ public :print # HACK to work w3mctl
10
+ srv = DRbObject.new_with_uri "druby://:9322"
11
+ if ENV['QUERY_STRING'] == 'crop'
12
+ hash = srv.crop_this_page(url, ENV['W3M_SOURCEFILE'], ENV['W3M_CHARSET'], self.extend(DRbUndumped))
13
+ else
14
+ hash = srv.nextpage(url, ENV['W3M_SOURCEFILE'], ENV['W3M_CHARSET'], self.extend(DRbUndumped))
15
+ end
16
+
17
+
18
+ if hash[:html]
19
+ puts "Content-Type: text/html"
20
+ puts "W3m-AutoPagerize-NextLink: #{hash[:nextLink]}" if hash[:nextLink]
21
+ puts "W3m-AutoPagerize-PageElement: #{hash[:pageElement]}" if hash[:pageElement]
22
+ puts
23
+ puts hash[:html]
24
+ else
25
+ if hash[:location]
26
+ puts "W3m-Control: BACK"
27
+ puts "W3m-Control: GOTO #{hash[:location]}"
28
+ puts "W3m-AutoPagerize-NextLink: #{hash[:nextLink]}" if hash[:nextLink]
29
+ end
30
+ puts
31
+ end
@@ -0,0 +1,489 @@
1
+ #!/usr/local/bin/ruby19
2
+ # -*- coding: utf-8 -*-
3
+ # (executable-interpret "ruby19 /m/home/rubikitch/w3m/cgi-bin/w3m-autopagerize/test-w3m-autopagerize.rb --no-use-color ")
4
+ # (executable-interpret "rm -f /log/w3m-autopagerize.log; w3m-autopagerize-server.rb -r")
5
+ start_time = Time.now
6
+ #Encoding.default_internal = "UTF-8"
7
+ require 'kconv'
8
+ require 'uri'
9
+ require 'rubygems'
10
+ require 'nokogiri'
11
+ require 'pp'
12
+ require 'logger'
13
+ require 'tmpdir'
14
+ require 'json'
15
+
16
+ ###########################################################################
17
+ # Configurable Variables #
18
+ ###########################################################################
19
+ $W3M_EXTRA_OPTIONS = ""
20
+ # see http://www.opera-wiki.com/index.php?FAQ%2F5.%E3%82%AB%E3%82%B9%E3%82%BF%E3%83%9E%E3%82%A4%E3%82%BA#k7bb0c80
21
+ $FALLBACK_PATTERNS = %w[次へ 次頁 次ページ 次項 次の 次を 先へ つぎへ つぎの
22
+ 進む next もっと見る ]
23
+ $FALLBACK_WORDS = %w[次 つぎ 続きます keep\ reading [→] 次一覧 Older\ Entries]
24
+ $FALLBACK_START_WORDS = %w[> > 次 つぎ Next NEXT next →]
25
+ $SITEINFO_IMPORT_URLS = %w[
26
+ http://wedata.net/databases/AutoPagerize/items.json
27
+ ]
28
+ $EXCLUDE_URLS = %w[
29
+ ^https?:\/\/.
30
+ ]
31
+ $LOG_FILE = $stderr
32
+
33
+ ###########################################################################
34
+ # DSL for nexturl #
35
+ ###########################################################################
36
+ $TEST_MODE = false
37
+ $SITEINFO = []
38
+ $client = nil
39
+ class SiteData < Struct.new(:nextLink, :insertBefore, :exampleUrl, :pageElement,
40
+ :block, :match)
41
+ def self.fallback_predicate1(text, words=$FALLBACK_WORDS, patterns=$FALLBACK_PATTERNS)
42
+ a = [
43
+ words.map{|w| %Q!#{text}="#{w}"!}.join(' or '),
44
+ patterns.map{|w| %Q!contains(#{text},"#{w}")!}.join(' or '),
45
+ ]
46
+ a.delete ""
47
+ a.join " or "
48
+ end
49
+
50
+ def self.fallback_predicate2(text, start_words=$FALLBACK_START_WORDS)
51
+ start_words.map{|w| %Q!starts-with(#{text},"#{w}")!}.join(' or ')
52
+ end
53
+
54
+ # link to next
55
+ def self.fallbacks
56
+ @fallbacks ||= lambda do
57
+ a = [
58
+ new("//a[#{fallback_predicate1('.')}]"),
59
+ new("//form[descendant::input[#{fallback_predicate1('@value')}]]"),
60
+ ]
61
+ if $FALLBACK_START_WORDS.to_a.length > 0
62
+ a.concat [
63
+ new("//a[#{fallback_predicate2('.')}]"),
64
+ new("//form[descendant::input[#{fallback_predicate2('@value')}]]"),
65
+ ]
66
+ end
67
+ a.extend(FallbackSetup)
68
+ end.call
69
+ end
70
+ module FallbackSetup
71
+ def setup!
72
+ each {|fallback| $SITEINFO << [/./, fallback]}
73
+ end
74
+ end
75
+
76
+ # Make the DSL pretty!
77
+ members.each do |m|
78
+ undef_method m
79
+ module_eval <<-EOC # hack for ruby-mode.el
80
+ #{'def'} #{m}(v=nil)
81
+ if v
82
+ self[:#{m}] = v
83
+ else
84
+ self[:#{m}]
85
+ end
86
+ end
87
+ EOC
88
+ end
89
+
90
+ def next_url(uri)
91
+ uri = URI(uri.to_s)
92
+ result = instance_exec(uri, match, &block) if block
93
+ xpath = nextLink
94
+ if xpath
95
+ nokogiri = $nokogiri_cache[uri.to_s]
96
+ $logger.info "#{__method__}: use xpath #{xpath}"
97
+ nodes = nokogiri.xpath(xpath)
98
+ node = nodes.first
99
+ $logger.debug "#{__method__}: nodes.length = #{nodes.length}"
100
+ nexturl = (node["href"] || node["action"] || node["value"]) rescue nil
101
+ # nexturl = nokogiri.xpath("#{xpath}/@href").first.content rescue nil
102
+ $logger.info "#{__method__}: nexturl = #{nexturl or 'NOT FOUND'}"
103
+ if nexturl
104
+ nexturl.gsub!(/ /, '+') # for some buggy sites not encoding spaces
105
+ uri.merge nexturl
106
+ end
107
+ else
108
+ $logger.info "#{__method__}: result = #{result}"
109
+ uri.merge result
110
+ end
111
+ end
112
+ end
113
+
114
+ def defnext(url_or_pattern, nexturl=nil, &b)
115
+ defnext_ url_or_pattern, nexturl do |u,m|
116
+ $logger.info "Use defnext for #{url_or_pattern}"
117
+ instance_exec(u, m, &b)
118
+ end
119
+ end
120
+
121
+ def defnext_(url_or_pattern, nexturl=nil, &block)
122
+ sd = SiteData.new
123
+ if nexturl
124
+ sd.block = lambda{|u,m| nexturl }
125
+ else
126
+ sd.block = block
127
+ end
128
+ $SITEINFO << [ url_or_pattern, sd ]
129
+ end
130
+
131
+
132
+ def addstring(url_or_pattern, string)
133
+ defnext_(url_or_pattern) {|u,m|
134
+ $logger.info "Use addstring for #{url_or_pattern}"
135
+ u.to_s + string
136
+ }
137
+ end
138
+
139
+ def increment(url_or_pattern, n=1)
140
+ defnext_(url_or_pattern) {|u,m|
141
+ $logger.info "Use increment for #{url_or_pattern}"
142
+ url=u.to_s
143
+ nextvar = m[1].to_i + n
144
+ url[ m.begin(1) ... m.end(1) ] = if m[1] =~ /^0/
145
+ format("%0#{m[1].length}d", nextvar)
146
+ else
147
+ nextvar.to_s
148
+ end
149
+ url
150
+ }
151
+ end
152
+
153
+ def w3mctl(*strings)
154
+ strings.each do |str|
155
+ if str
156
+ if str==true
157
+ $client.print "\r\n\r\n"
158
+ else
159
+ $client.print "W3m-Control: #{str}\r\n"
160
+ end
161
+ end
162
+ end
163
+ nil
164
+ end
165
+
166
+ ###########################################################################
167
+ # File.zread #
168
+ ###########################################################################
169
+ require 'zlib'
170
+
171
+ Zlib::GZIP_MAGIC = "\037\213"
172
+ Zlib::GZIP_MAGIC.force_encoding("ASCII-8BIT") if RUBY_VERSION >= "1.9"
173
+
174
+ def File.zread(file)
175
+ Object.module_eval do
176
+ open(file) do |f|
177
+ magic = f.read(2)
178
+ f.rewind
179
+ if magic == Zlib::GZIP_MAGIC
180
+ Zlib::GzipReader.wrap(f) {|gz| gz.read }
181
+ else
182
+ f.read
183
+ end
184
+ end
185
+ end
186
+ end
187
+
188
+ ###########################################################################
189
+ # content cache #
190
+ ###########################################################################
191
+ TMPFILE = Dir.tmpdir + "/w3m-autopagerize.tmp.html"
192
+ $content_cache = Hash.new do |h,url|
193
+ $logger.debug "cache miss: set $content_cache[#{url.inspect}]"
194
+ # use w3m to pass cookie
195
+ header, source = get_header_and_content(url)
196
+ $logger.debug "cache miss: source is html? = #{source =~ /<body/i and true}"
197
+ charset = normalize_charset(header[/charset=(.+)$/,1] || Kconv.guess(source))
198
+ source.force_encoding("ASCII-8BIT")
199
+ h[url] = [source, charset]
200
+ end
201
+
202
+ # BUG: libxml2 cannot handle id() function without doctype.
203
+ # http://labs.gmo.jp/blog/ku/2008/07/libxmlhtmlxpathid.html
204
+ DOCTYPE = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'
205
+ $nokogiri_cache = Hash.new do |h,url|
206
+ $logger.debug "cache miss: set $nokogiri_cache[#{url.inspect}]"
207
+ source, charset = $content_cache[url]
208
+ h[url] = Nokogiri::HTML(DOCTYPE+source, nil, charset)
209
+ end
210
+
211
+
212
+ ###########################################################################
213
+ # Utilities #
214
+ ###########################################################################
215
+ def get_header_and_content(url)
216
+ output = `w3m #$W3M_EXTRA_OPTIONS -dump_both -o accept_encoding='gzip' "#{url}"`
217
+ header, source = output.force_encoding("ASCII-8BIT").split(/\n\n/, 2)
218
+ open(TMPFILE,"wb"){|f| f.write source}
219
+ source = File.zread(TMPFILE).force_encoding "ASCII-8BIT"
220
+ [header, source]
221
+ ensure
222
+ File.unlink TMPFILE if File.exist? TMPFILE
223
+ end
224
+
225
+ def get_content(url)
226
+ if url =~ /^https?:/
227
+ get_header_and_content(url)[1]
228
+ else # local file
229
+ File.zread(File.expand_path(url)).force_encoding "ASCII-8BIT"
230
+ end
231
+ end
232
+
233
+ def normalize_charset(charset)
234
+ charset = charset.to_s
235
+ # FIXME I do not know other charsets than Japanese.
236
+ charset.downcase == "shift_jis" ? "cp932" : charset
237
+ end
238
+
239
+
240
+ def reinit
241
+ $SITEINFO = []
242
+ end
243
+
244
+ # unless "".respond_to? :force_encoding # for ruby 1.8
245
+ # class String
246
+ # def force_encoding(args); self end
247
+ # def encoding; Kconv.guess(self) end
248
+ # end
249
+ # end
250
+
251
+ ###########################################################################
252
+ # Entry Points #
253
+ ###########################################################################
254
+ class Server
255
+ def sitedata(url)
256
+ url = url.to_s
257
+ match = nil
258
+ sitedata = $SITEINFO.find{|re, block|
259
+ match = case re
260
+ when Regexp
261
+ url.match(re)
262
+ else
263
+ url == re.to_s
264
+ end
265
+ }[1]
266
+ sitedata.match = match
267
+ sitedata
268
+ end
269
+ private :sitedata
270
+
271
+ HTML_OUTPUT_FILE = "/tmp/w3m-autopagerize-tmp.html"
272
+ def crop_html(location, prev_url, sitedata)
273
+ $logger.debug "#{__method__}: url = #{location}"
274
+ location = location.to_s
275
+ nokogiri = $nokogiri_cache[location]
276
+ title = nokogiri.at("//title").to_html rescue "<title></title>"
277
+ begin
278
+ $logger.info "#{__method__}: use xpath #{sitedata.pageElement}"
279
+ nodes = nokogiri.xpath(sitedata.pageElement)
280
+ $logger.debug "#{__method__}: nodes.length = #{nodes.length}"
281
+ html_piece = nodes.to_html
282
+ raise if html_piece.strip.empty?
283
+ rescue
284
+ $logger.error "#{__method__}: failed to crop!"
285
+ html_piece = nokogiri.at("body").to_html
286
+ errmsg = %{<p>w3m-autopagerize failed to crop html but next url is found.<br />
287
+ xpath = #{sitedata.pageElement || 'pageElement not found'}
288
+ </p>
289
+ <hr>}
290
+ else
291
+ errmsg = ""
292
+ end
293
+ # BUG: Nokogiri emits superfluous &#13;.
294
+ html_piece.gsub! /&#13;/, '' # hack
295
+ # BUG: w3m cannot handle <script />, so replace it with <script></script>
296
+ html_piece.gsub! %r!(<script.+?)/>!, '\1></script>' # hack
297
+ %w[location title prev_url sitedata.pageElement sitedata.nextLink errmsg html_piece].each do |e|
298
+ # $logger.debug "#{__method__}: #{e}.encoding = #{eval('e').to_s.encoding}"
299
+ end
300
+ html = %{<html>
301
+ <head><base href="#{location}" />#{title}
302
+ <link rel="w3m-autopagerize-orig" href="#{location}" />
303
+ <link rel="w3m-autopagerize-prev" href="#{prev_url}" />
304
+ </head>
305
+ <body>
306
+ Original URL: <a href="#{location}">#{location}</a><br>
307
+ #{errmsg}
308
+ #{html_piece}
309
+ </body></html> }
310
+
311
+ { :html => html, :location => location,
312
+ :pageElement => sitedata.pageElement, :nextLink => sitedata.nextLink}
313
+ end
314
+ private :crop_html
315
+
316
+ def prefetch_next_location(location, sitedata)
317
+ Thread.start do
318
+ # sleep 1
319
+ $logger.debug "#{__method__}: #{location}"
320
+ # sitedata = sitedata location
321
+ newloc = sitedata.next_url(location)
322
+ $logger.debug "#{__method__}: new location: #{newloc}"
323
+ $nokogiri_cache[newloc.to_s]
324
+ end
325
+ # It uses Ordered Hash in Ruby 1.9
326
+ [$nokogiri_cache, $content_cache].each do |hash|
327
+ hash.delete hash.first[0] if hash.length > 3
328
+ end
329
+ end
330
+ private :prefetch_next_location
331
+
332
+ def prepare(url, srcfile, charset, client, method)
333
+ $logger.info "=================================================="
334
+ $logger.info "#{method}: entered url=#{url} charset=#{charset}"
335
+ $logger.debug "#{method}: W3M_SOURCEFILE = #{srcfile}" if srcfile
336
+ $client = client
337
+
338
+ src = File.zread(srcfile).force_encoding("ASCII-8BIT") if srcfile
339
+ if url =~ /^file:.*\/cgi-bin\// # from Local CGI
340
+ url = src.force_encoding("ASCII-8BIT")[%r!<base href=['"](.+?)['"]!, 1] # '"
341
+ $logger.info "#{method}: base url=#{url}"
342
+ else # from W3M_SOURCEFILE
343
+ $logger.debug "#{method}: set $content_cache[#{url.inspect}] from W3M_SOURCEFILE"
344
+ $logger.debug "#{method}: source is html? = #{src =~ /<body/i and true}"
345
+ $content_cache[url] = [src.force_encoding("ASCII-8BIT"), normalize_charset(charset)] if src
346
+ end
347
+ [ src, url ]
348
+ end
349
+
350
+ def crop_this_page(url, srcfile, charset, client)
351
+ src, url = prepare(url, srcfile, charset, client, __method__)
352
+
353
+ sitedata = sitedata url
354
+ begin
355
+ crop_html url, nil, sitedata
356
+ ensure
357
+ prefetch_next_location sitedata.next_url(url), sitedata
358
+ end
359
+ end
360
+
361
+
362
+ def nextpage(url, srcfile, charset, client)
363
+ src, url = prepare(url, srcfile, charset, client, __method__)
364
+ sitedata = sitedata url
365
+ location = sitedata.next_url(url)
366
+
367
+ if location
368
+ if sitedata.pageElement
369
+ $logger.debug "#{__method__}: location and pageElement found."
370
+ else
371
+ $logger.debug "#{__method__}: location found."
372
+ end
373
+ begin
374
+ crop_html location, url, sitedata
375
+ ensure
376
+ prefetch_next_location location, sitedata
377
+ end
378
+ else
379
+ fallback_nexturl = for fallback in SiteData.fallbacks
380
+ u = fallback.next_url(url) and break u
381
+ end
382
+ if fallback_nexturl
383
+ $logger.info "#{__method__}: fallback"
384
+ begin
385
+ crop_html fallback_nexturl, url, fallback
386
+ ensure
387
+ prefetch_next_location fallback_nexturl, fallback
388
+ end
389
+ else
390
+ $logger.debug "#{__method__}: no location."
391
+ raise "no location!"
392
+ end
393
+ end
394
+ rescue
395
+ html = %{<pre>Error!
396
+ xpath = #{sitedata.nextLink || 'nextLink not found'}
397
+ #{$!}
398
+ #{$@.pretty_inspect}
399
+ src_encoding=#{Kconv.guess(src || $content_cache[url].first)}
400
+ </pre>
401
+ }
402
+ $logger.error "#{__method__}: error!"
403
+ $logger.error "#{__method__}: #$!"
404
+ $logger.error "#{__method__}: #{$@.pretty_inspect}"
405
+ {:html => html}
406
+ end
407
+
408
+
409
+ # (executable-interpret "ruby19 -r w3m-autopagerize-server -e '$logger=Logger.new(); load_siteinfo'")
410
+ # (executable-interpret "ruby18 -r w3m-autopagerize-server -e '$logger=Logger.new(); load_siteinfo'")
411
+ def load_siteinfo
412
+ keys = %w[exampleUrl insertBefore pageElement nextLink]
413
+ $SITEINFO_IMPORT_URLS.each do |siteinfo_url|
414
+ JSON.parse(get_content(siteinfo_url).toutf8).each do |hash|
415
+ data = hash["data"]
416
+ if url = data["url"] and not $EXCLUDE_URLS.include? url
417
+ sd = SiteData.new data["nextLink"], data["insertBefore"],
418
+ data["exampleUrl"], data["pageElement"]
419
+ $SITEINFO << [Regexp.new(url), sd]
420
+ end
421
+ end
422
+ end
423
+ $logger.info "#{__method__}: loaded"
424
+ end
425
+
426
+ def restart
427
+ exec $0
428
+ end
429
+
430
+ def load_config_file(config_file)
431
+ if config_file == :ignore
432
+ $stderr.puts "load_config_file: config file is ignored!"
433
+ else
434
+ config_file = File.expand_path(config_file, File.dirname(__FILE__))
435
+ if File.file? config_file
436
+ load(config_file)
437
+ $stderr.puts "load_config_file: loaded #{config_file}"
438
+ else
439
+ $stderr.puts "load_config_file: config file #{config_file} not found!"
440
+ end
441
+ end
442
+ rescue Exception
443
+ $stderr.puts "load_config_file: error loading #{config_file}!"
444
+ end
445
+ end
446
+
447
+
448
+ if __FILE__==$0
449
+ require 'optparse'
450
+ require 'drb'
451
+ conf = Struct.new(:log_file, :siteinfo_url, :config_file).new
452
+ conf.config_file = File.expand_path "~/.w3m-autopagerize.rb"
453
+ ARGV.options {|o|
454
+ o.on("-l", "--log LOGFILE",
455
+ "Use log file.") {|x| conf.log_file = File.expand_path(x, File.dirname(__FILE__)) }
456
+ o.on("-s", "--siteinfo URL",
457
+ "URL of JSON data (SITEINFO).") {|x| conf.siteinfo_url = x}
458
+ o.on("-c", "--config CONFIG", "Use config file.") {|x| conf.config_file = x }
459
+ o.on("-f", "Ignore config file.") {|x| conf.config_file = :ignore }
460
+ o.on("-r", "--restart", "--reload",
461
+ "Restart the server.") {|x|
462
+ DRbObject.new_with_uri(%q!druby://:9322!).restart rescue nil
463
+ puts "w3m-autopagerize-server restarted."
464
+ exit
465
+ }
466
+ o.parse!
467
+ }
468
+
469
+ srv = Server.new
470
+ srv.load_config_file(conf.config_file)
471
+ $LOG_FILE = conf.log_file || $LOG_FILE
472
+ $stderr.puts "startup: log file = #{$LOG_FILE.inspect}"
473
+ $logger = Logger.new($LOG_FILE)
474
+ $SITEINFO_IMPORT_URLS = [ conf.siteinfo_url ] if conf.siteinfo_url
475
+ $logger.info "$SITEINFO_IMPORT_URLS = #{$SITEINFO_IMPORT_URLS.inspect}"
476
+
477
+ srv.load_siteinfo
478
+ GC.start
479
+ SiteData.fallbacks.setup!
480
+
481
+ $stderr.puts "start w3m-autopagerize-server.rb (#{Time.now-start_time} secs)"
482
+
483
+ Thread.start do
484
+ loop { sleep 300; GC.start }
485
+ end
486
+
487
+ DRb.start_service("druby://:9322", srv)
488
+ DRb.thread.join
489
+ end
@@ -0,0 +1,130 @@
1
+ # -*- coding: utf-8 -*-
2
+ # This file shows default setting. If you customize w3m-autopagerize,
3
+ # copy this file to ~/.w3m-autopagerize.rb.
4
+
5
+ # Extra options of w3m to fetch web page.
6
+ $W3M_EXTRA_OPTIONS = ""
7
+
8
+ # SITEINFO location. Set URL or filename of SITEINFO JSON data.
9
+ $SITEINFO_IMPORT_URLS = %w[
10
+ http://wedata.net/databases/AutoPagerize/items.json
11
+ ]
12
+ # Disable SITEINFO entries. The default is to ignore `"url": "^https?:\/\/."' entry.
13
+ $EXCLUDE_URLS = %w[
14
+ ^https?:\/\/.
15
+ ]
16
+
17
+ # Log file location
18
+ # =================
19
+ #
20
+ # The default destination of the log is stderr.
21
+ $LOG_FILE = $stderr
22
+ # If you use a log file, uncomment this. Note that the default
23
+ # directory of log file is the directory of w3m-autopagerize-server.rb.
24
+
25
+ # $LOG_FILE = "w3m-autopagerize.log"
26
+
27
+ # Fallback patterns
28
+ # =================
29
+ #
30
+ # If w3m-autopagerize cannot find next location, ie, wrong/no SITEINFO
31
+ # entry, w3m-autopagerize uses heuristic method to find next location
32
+ # with $FALLBACK_* variables. It is like FastForward of Opera.
33
+ #
34
+ # Links/buttons whose text is "next" or "keep reading" (full match) are
35
+ # considered as next location.
36
+ $FALLBACK_WORDS = %w[次 つぎ 続きます keep\ reading [→] 次一覧 Older\ Entries next Next NEXT]
37
+ # Links/buttons whose text starts with ">" (prefix match) are
38
+ # considered as next location.
39
+ $FALLBACK_START_WORDS = %w[> >]
40
+ # Links/buttons whose text contains ">" (partial match) are considered
41
+ # as next location.
42
+ $FALLBACK_PATTERNS = %w[次へ 次頁 次ページ 次項 次の 次を 先へ つぎへ つぎの 進む もっと見る ]
43
+
44
+ # Custom Location
45
+ # ===============
46
+ #
47
+ # You write `next' pages by URL rule. Use `addstring' and `increment'
48
+ # function. It is handy method to specify next location.
49
+ # It requires NO XPATH KNOWLEDGE, but some Regexp knowledge:-)
50
+ #
51
+ # Custom locations takes precedence over SITEINFO. It means that even
52
+ # if SITEINFO defines the configuration of a site, use custom
53
+ # location,
54
+ #
55
+ # For example, The next page of "http://www.dotup.org/" is
56
+ # "http://www.dotup.org/2.html". Use simply `addstring' function.
57
+ #
58
+ # The next page of "http://www.dotup.org/2.html" is
59
+ # "http://www.dotup.org/3.html". Use `increment' function with Regexp.
60
+ # The first occurrence of "(\d)" (digits) are replaced with the next number.
61
+ # Note that writing a URL Regexp by %r!URL Regexp! is handy.
62
+ addstring "http://www.dotup.org/", "2.html"
63
+ increment %r!http://www.dotup.org/(\d+).html$!
64
+ #
65
+ # `increment' can add any integer. For example,
66
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja&start=20"
67
+ # to
68
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja&start=40".
69
+ increment %r!^http://images.google.(?:co.jp|com)/.*start=(\d+)!, 20
70
+ #
71
+ # `addstring' function can accept Regexp. For example,
72
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja"
73
+ # to
74
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja&start=20".
75
+ #
76
+ # Note that `increment' of google image search must be defined BEFORE
77
+ # `addstring'. If `addstring' is before `increment', w3m-autopagerize
78
+ # considers the next page of
79
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja&start=20"
80
+ # as
81
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja&start=20&start=20".
82
+ # It is because the URL matches both
83
+ # %r!^http://images.google.(?:co.jp|com)/! and
84
+ # %r!^http://images.google.(?:co.jp|com)/.*start=(\d+)!.
85
+ addstring %r!^http://images.google.(?:co.jp|com)/!, '&start=20'
86
+
87
+ # Custom Action
88
+ # =============
89
+ #
90
+ # You can execute any w3m commands for certain URL. For example, I
91
+ # (rubikitch) login hatena and open my hatena diary, execute
92
+ # "GOTO http://d.hatena.ne.jp/rubikitch/" and "DELETE_PREVBUF"
93
+ # three times. Use `defnext' and `w3mctl'.
94
+ #
95
+ # This is a good example of login and goto action. Note that when you
96
+ # use login and goto, you must set your login/password to
97
+ # ~/.w3m/pre_form file.
98
+ defnext "https://www.hatena.ne.jp/login" do
99
+ w3mctl "GOTO http://d.hatena.ne.jp/rubikitch/", "DELETE_PREVBUF", "DELETE_PREVBUF", "DELETE_PREVBUF"
100
+ end
101
+
102
+ # Custom SITEINFO
103
+ # ===============
104
+ #
105
+ # If you have your original SITEINFO for AutoPagerize, you can simply
106
+ # add the URL or filename into the top of $SITEINFO_IMPORT_URLS.
107
+ #
108
+ # The SITEINFO can be defined in Ruby DSL.
109
+ #
110
+ # In JSON:
111
+ #
112
+ # {
113
+ # "name": "(.~) what a quiet stiff (~.)",
114
+ # "data": {
115
+ # "insertBefore": "",
116
+ # "pageElement": "id(\"pixflow\")",
117
+ # "url": "^http:\/\/whytheluckystiff\\.net\/quiet\/",
118
+ # "nextLink": "id(\"header\")\/a[last()]",
119
+ # "exampleUrl": "http:\/\/whytheluckystiff.net\/quiet\/"
120
+ # }
121
+ # }
122
+ #
123
+ # In Ruby:
124
+ #
125
+ # defnext %r!^http://whytheluckystiff\.net/quiet/! do
126
+ # insertBefore ''
127
+ # pageElement 'id("pixflow")'
128
+ # nextLink 'id("header")/a[last()]'
129
+ # exampleUrl 'http://whytheluckystiff.net/quiet/'
130
+ # end
@@ -0,0 +1,252 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml"
4
+ lang="en" xml:lang="en">
5
+ <head>
6
+ <title>AutoPagerize for w3m</title>
7
+ <meta http-equiv="Content-Type" content="text/html;charset=euc-jp"/>
8
+ <meta name="generator" content="Org-mode"/>
9
+ <meta name="generated" content="2009-01-30"/>
10
+ <meta name="author" content="rubikitch"/>
11
+ <style type="text/css">
12
+ <!--/*--><![CDATA[/*><!--*/
13
+ html { font-family: Times, serif; font-size: 12pt; }
14
+ .title { text-align: center; }
15
+ .todo { color: red; }
16
+ .done { color: green; }
17
+ .tag { background-color:lightblue; font-weight:normal }
18
+ .target { }
19
+ .timestamp { color: grey }
20
+ .timestamp-kwd { color: CadetBlue }
21
+ p.verse { margin-left: 3% }
22
+ pre {
23
+ border: 1pt solid #AEBDCC;
24
+ background-color: #F3F5F7;
25
+ padding: 5pt;
26
+ font-family: courier, monospace;
27
+ font-size: 90%;
28
+ overflow:auto;
29
+ }
30
+ table { border-collapse: collapse; }
31
+ td, th { vertical-align: top; }
32
+ dt { font-weight: bold; }
33
+ div.figure { padding: 0.5em; }
34
+ div.figure p { text-align: center; }
35
+ .linenr { font-size:smaller }
36
+ .code-highlighted {background-color:#ffff00;}
37
+ .org-info-js_info-navigation { border-style:none; }
38
+ #org-info-js_console-label { font-size:10px; font-weight:bold;
39
+ white-space:nowrap; }
40
+ .org-info-js_search-highlight {background-color:#ffff00; color:#000000;
41
+ font-weight:bold; }
42
+ /*]]>*/-->
43
+ </style>
44
+ <script type="text/javascript">
45
+ <!--/*--><![CDATA[/*><!--*/
46
+ function CodeHighlightOn(elem, id)
47
+ {
48
+ var target = document.getElementById(id);
49
+ if(null != target) {
50
+ elem.cacheClassElem = elem.className;
51
+ elem.cacheClassTarget = target.className;
52
+ target.className = "code-highlighted";
53
+ elem.className = "code-highlighted";
54
+ }
55
+ }
56
+ function CodeHighlightOff(elem, id)
57
+ {
58
+ var target = document.getElementById(id);
59
+ if(elem.cacheClassElem)
60
+ elem.className = elem.cacheClassElem;
61
+ if(elem.cacheClassTarget)
62
+ target.className = elem.cacheClassTarget;
63
+ }
64
+ /*]]>*/-->
65
+ </script>
66
+ </head><body>
67
+ <h1 class="title">AutoPagerize for w3m</h1>
68
+
69
+ <p>AutoPagerize for w3m <a href="http://rubikitchrb.rubyforge.org/">http://rubikitchrb.rubyforge.org/</a>
70
+ </p>
71
+ <p>
72
+ Copyright (c) 2009 rubikitch &lt;rubikitch@ruby-lang.org&gt; <a href="http://www.rubyist.net/~rubikitch/">http://www.rubyist.net/~rubikitch/</a>
73
+ </p>
74
+ <p>
75
+ Use and distribution subject to the terms of the Ruby license.
76
+ </p>
77
+ <div id="table-of-contents">
78
+ <h2>Table of Contents</h2>
79
+ <div id="text-table-of-contents">
80
+ <ul>
81
+ <li><a href="#sec-1">1 Overview </a></li>
82
+ <li><a href="#sec-2">2 Programs </a>
83
+ <ul>
84
+ <li><a href="#sec-2.1">2.1 w3m-autopagerize-server.rb </a></li>
85
+ <li><a href="#sec-2.2">2.2 next.cgi </a></li>
86
+ <li><a href="#sec-2.3">2.3 config.sample.rb </a></li>
87
+ </ul>
88
+ </li>
89
+ <li><a href="#sec-3">3 Installation </a>
90
+ <ul>
91
+ <li><a href="#sec-3.1">3.1 Install AutoPagerize for w3m </a></li>
92
+ <li><a href="#sec-3.2">3.2 Copy config file </a></li>
93
+ <li><a href="#sec-3.3">3.3 Local CGI setup </a></li>
94
+ <li><a href="#sec-3.4">3.4 Key bind </a></li>
95
+ </ul>
96
+ </li>
97
+ <li><a href="#sec-4">4 Usage </a></li>
98
+ <li><a href="#sec-5">5 License </a></li>
99
+ </ul>
100
+ </div>
101
+ </div>
102
+
103
+ <div id="outline-container-1" class="outline-2">
104
+ <h2 id="sec-1">1 Overview </h2>
105
+ <div id="text-1">
106
+
107
+ <p>AutoPagerize for w3m finds next link and extracts page contents. It
108
+ consists of dRuby server program (w3m-autopagerize-server.rb) and w3m
109
+ Local CGI program (next.cgi).
110
+ </p>
111
+ </div>
112
+
113
+ </div>
114
+
115
+ <div id="outline-container-2" class="outline-2">
116
+ <h2 id="sec-2">2 Programs </h2>
117
+ <div id="text-2">
118
+
119
+
120
+ </div>
121
+
122
+ <div id="outline-container-2.1" class="outline-3">
123
+ <h3 id="sec-2.1">2.1 w3m-autopagerize-server.rb </h3>
124
+ <div id="text-2.1">
125
+
126
+ <p>AutoPagerize for w3m uses dRuby server w3m-autopagerize-server.rb
127
+ because initializing site data is time-consuming. Before using
128
+ AutoPagerize for w3m, you have to invoke w3m-autopagerize-server.rb!
129
+ w3m-autopagerize-server.rb loads config file (~/.w3m-autopagerize.rb)
130
+ if any and reads AutoPagerize SITEINFO data from wedata.net by
131
+ default.
132
+ </p>
133
+ </div>
134
+
135
+ </div>
136
+
137
+ <div id="outline-container-2.2" class="outline-3">
138
+ <h3 id="sec-2.2">2.2 next.cgi </h3>
139
+ <div id="text-2.2">
140
+
141
+ <p>next.cgi is Local CGI program to ask w3m-autopagerize-server.rb to get next page.
142
+ </p>
143
+ </div>
144
+
145
+ </div>
146
+
147
+ <div id="outline-container-2.3" class="outline-3">
148
+ <h3 id="sec-2.3">2.3 config.sample.rb </h3>
149
+ <div id="text-2.3">
150
+
151
+ <p>The sample config file to customize. See <a href="config.sample.rb">file:config.sample.rb</a> for detail.
152
+ </p>
153
+ </div>
154
+ </div>
155
+
156
+ </div>
157
+
158
+ <div id="outline-container-3" class="outline-2">
159
+ <h2 id="sec-3">3 Installation </h2>
160
+ <div id="text-3">
161
+
162
+
163
+ </div>
164
+
165
+ <div id="outline-container-3.1" class="outline-3">
166
+ <h3 id="sec-3.1">3.1 Install AutoPagerize for w3m </h3>
167
+ <div id="text-3.1">
168
+
169
+ <p>AutoPagerize for w3m works with Ruby 1.9 only! So, you have to install
170
+ Ruby 1.9.x. Then simply issue:
171
+ </p>
172
+ <pre class="example">
173
+ $ sudo ruby1.9 -S gem install w3m-autopagerize
174
+ </pre>
175
+
176
+ </div>
177
+
178
+ </div>
179
+
180
+ <div id="outline-container-3.2" class="outline-3">
181
+ <h3 id="sec-3.2">3.2 Copy config file </h3>
182
+ <div id="text-3.2">
183
+
184
+ <p>If you customize AutoPagerize for w3m, copy config.sample.rb to
185
+ ~/.w3m-autopagerize.rb and edit it.
186
+ </p>
187
+ </div>
188
+
189
+ </div>
190
+
191
+ <div id="outline-container-3.3" class="outline-3">
192
+ <h3 id="sec-3.3">3.3 Local CGI setup </h3>
193
+ <div id="text-3.3">
194
+
195
+ <p>Local CGI program next.cgi is installed at
196
+ /usr/local/bin/next.cgi. You have to make w3m find it. Add
197
+ /usr/local/bin to your Local CGI path (cgi<sub>bin</sub>), or make symlink.
198
+ </p>
199
+ <pre class="example">
200
+ $ cd ~/w3m/cgi-bin; ln -s /usr/local/bin/next.cgi
201
+ </pre>
202
+
203
+ </div>
204
+
205
+ </div>
206
+
207
+ <div id="outline-container-3.4" class="outline-3">
208
+ <h3 id="sec-3.4">3.4 Key bind </h3>
209
+ <div id="text-3.4">
210
+
211
+ <p>Bind AutoPagerize for w3m to your favorite key. Edit ~/.w3m/keymap and add this line.
212
+ </p>
213
+ <pre class="example">
214
+ keymap x GOTO file:/cgi-bin/next.cgi
215
+ </pre>
216
+
217
+ </div>
218
+ </div>
219
+
220
+ </div>
221
+
222
+ <div id="outline-container-4" class="outline-2">
223
+ <h2 id="sec-4">4 Usage </h2>
224
+ <div id="text-4">
225
+
226
+ <ul>
227
+ <li>
228
+ Press `x' key to go to next page.
229
+ </li>
230
+ <li>
231
+ Press `=' key to see information, eg. XPath to get next page.
232
+
233
+ </li>
234
+ </ul>
235
+ </div>
236
+
237
+ </div>
238
+
239
+ <div id="outline-container-5" class="outline-2">
240
+ <h2 id="sec-5">5 License </h2>
241
+ <div id="text-5">
242
+
243
+ <p>AutoPagerize for w3m is licensed under the same terms as Ruby.
244
+ </p></div>
245
+ </div>
246
+ <div id="postamble"><p class="author"> Author: rubikitch
247
+ <a href="mailto:rubikitch@ruby-lang.org">&lt;rubikitch@ruby-lang.org&gt;</a>
248
+ </p>
249
+ <p class="date"> Date: 2009-01-30</p>
250
+ <p>HTML generated by org-mode 6.18 in emacs 22</p>
251
+ </div></body>
252
+ </html>
@@ -0,0 +1,65 @@
1
+ #+TITLE: AutoPagerize for w3m
2
+ #+AUTHOR: rubikitch
3
+ #+EMAIL: rubikitch@ruby-lang.org
4
+ #+DATE: 2009-01-30
5
+ #+LANGUAGE: en
6
+ #+OPTIONS: H:3 num:t toc:t \n:nil @:t ::t |:t ^:t -:t f:t *:t TeX:t LaTeX:nil skip:nil d:nil todo:t pri:nil tags:not-in-toc
7
+ #+INFOJS_OPT: view:nil toc:nil ltoc:t mouse:underline buttons:0 path:http://orgmode.org/org-info.js
8
+ #+EXPORT_SELECT_TAGS: export
9
+ #+EXPORT_EXCLUDE_TAGS: noexport
10
+ #+LINK_UP:
11
+ #+LINK_HOME:
12
+ AutoPagerize for w3m http://rubikitchrb.rubyforge.org/
13
+
14
+ Copyright (c) 2009 rubikitch <rubikitch@ruby-lang.org> http://www.rubyist.net/~rubikitch/
15
+
16
+ Use and distribution subject to the terms of the Ruby license.
17
+ * Overview
18
+ AutoPagerize for w3m finds next link and extracts page contents. It
19
+ consists of dRuby server program (w3m-autopagerize-server.rb) and w3m
20
+ Local CGI program (next.cgi).
21
+
22
+ * Programs
23
+ ** w3m-autopagerize-server.rb
24
+ AutoPagerize for w3m uses dRuby server w3m-autopagerize-server.rb
25
+ because initializing site data is time-consuming. Before using
26
+ AutoPagerize for w3m, you have to invoke w3m-autopagerize-server.rb!
27
+ w3m-autopagerize-server.rb loads config file (~/.w3m-autopagerize.rb)
28
+ if any and reads AutoPagerize SITEINFO data from wedata.net by
29
+ default.
30
+
31
+ ** next.cgi
32
+ next.cgi is Local CGI program to ask w3m-autopagerize-server.rb to get next page.
33
+
34
+ ** config.sample.rb
35
+ The sample config file to customize. See file:config.sample.rb for detail.
36
+
37
+ * Installation
38
+ ** Install AutoPagerize for w3m
39
+ AutoPagerize for w3m works with Ruby 1.9 only! So, you have to install
40
+ Ruby 1.9.x. Then simply issue:
41
+
42
+ : $ sudo ruby1.9 -S gem install w3m-autopagerize
43
+
44
+ ** Copy config file
45
+ If you customize AutoPagerize for w3m, copy config.sample.rb to
46
+ ~/.w3m-autopagerize.rb and edit it.
47
+
48
+ ** Local CGI setup
49
+ Local CGI program next.cgi is installed at
50
+ /usr/local/bin/next.cgi. You have to make w3m find it. Add
51
+ /usr/local/bin to your Local CGI path (cgi_bin), or make symlink.
52
+
53
+ : $ cd ~/w3m/cgi-bin; ln -s /usr/local/bin/next.cgi
54
+
55
+ ** Key bind
56
+ Bind AutoPagerize for w3m to your favorite key. Edit ~/.w3m/keymap and add this line.
57
+
58
+ : keymap x GOTO file:/cgi-bin/next.cgi
59
+
60
+ * Usage
61
+ - Press `x' key to go to next page.
62
+ - Press `=' key to see information, eg. XPath to get next page.
63
+
64
+ * License
65
+ AutoPagerize for w3m is licensed under the same terms as Ruby.
@@ -0,0 +1,166 @@
1
+ #!/usr/local/bin/ruby19
2
+ # -*- coding: utf-8 -*-
3
+ # (executable-interpret "ruby19 /m/home/rubikitch/w3m/cgi-bin/w3m-autopagerize/test-w3m-autopagerize.rb --no-use-color ")
4
+ require 'fileutils'
5
+ FileUtils.rm_f "test.log"
6
+
7
+ require 'test/unit'
8
+ require 'open-uri'
9
+ require 'script'
10
+ require 'w3m-autopagerize-server' # !> method redefined; discarding old debug_with_time
11
+
12
+ $TEST_MODE = true
13
+ $W3M_EXTRA_OPTIONS = "-o http_proxy=http://127.0.0.1:8339/"
14
+ $logger = Logger.new "test.log"
15
+ class TestAutoPagerize < Test::Unit::TestCase
16
+ def test_hatena_success
17
+ $logger.info "Test: #{__method__}"
18
+ reinit
19
+ defnext %r{^https?:\/\/(?:d2?|[^.]+\.g)\.hatena\.ne\.jp\/} do
20
+ exampleUrl %{http://os0x.g.hatena.ne.jp/os0x/}
21
+ pageElement %{id("days")}
22
+ nextLink %{//a[@rel="prev"]}
23
+ end
24
+
25
+ nexturl = "http://d.hatena.ne.jp/rubikitch/20090110/1231524557"
26
+ origurl = "http://d.hatena.ne.jp/rubikitch/20090113/1231844047"
27
+ np = Server.new.nextpage(origurl, nil, nil, Object.new)
28
+ assert_equal nexturl, np[:location]
29
+ assert_match(/<base/, np[:html])
30
+ assert_equal %{id("days")}, np[:pageElement]
31
+ assert_equal %{//a[@rel="prev"]}, np[:nextLink]
32
+ end
33
+
34
+ def test_hatena_fail
35
+ $logger.info "Test: #{__method__}"
36
+ reinit
37
+ defnext %r{^https?:\/\/(?:d2?|[^.]+\.g)\.hatena\.ne\.jp\/} do
38
+ exampleUrl %{http://os0x.g.hatena.ne.jp/os0x/}
39
+ pageElement %{id("noelement")}
40
+ nextLink %{//a[@rel="prev"]}
41
+ end
42
+
43
+ nexturl = "http://d.hatena.ne.jp/rubikitch/20090110/1231524557"
44
+ origurl = "http://d.hatena.ne.jp/rubikitch/20090113/1231844047"
45
+ np = Server.new.nextpage(origurl, nil, nil, Object.new)
46
+ assert_equal nexturl, np[:location]
47
+ assert_match(/failed to crop html/, np[:html])
48
+ assert_equal %{id("noelement")}, np[:pageElement]
49
+ assert_equal %{//a[@rel="prev"]}, np[:nextLink]
50
+ end
51
+
52
+ def test_google_addstring
53
+ $logger.info "Test: #{__method__}"
54
+ reinit
55
+ addstring %r!^http://www.google.(?:co.jp|com)/search!, '&start=100'
56
+
57
+ nexturl = "http://www.google.com/search?q=ruby&hl=ja&num=100&start=100"
58
+ origurl = "http://www.google.com/search?q=ruby&hl=ja&num=100"
59
+ np = Server.new.nextpage(origurl, nil, nil, Object.new)
60
+ assert_equal nexturl, np[:location]
61
+ end
62
+
63
+ def test_google_increment
64
+ reinit
65
+ increment %r!^http://www.google.(?:co.jp|com)/search.*start=(\d+)!, 100
66
+
67
+ nexturl = "http://www.google.com/search?q=ruby&hl=ja&num=100&start=200"
68
+ origurl = "http://www.google.com/search?q=ruby&hl=ja&num=100&start=100"
69
+ np = Server.new.nextpage(origurl, nil, nil, Object.new)
70
+ assert_equal nexturl, np[:location]
71
+ end
72
+
73
+ def test_google_fallback_link
74
+ $logger.info "Test: #{__method__}"
75
+ reinit
76
+ $FALLBACK_PATTERNS = %w[次へ]
77
+ $FALLBACK_WORDS = %w[次へ]
78
+ $SITEINFO = [[ /./, SiteData.fallbacks[0] ]]
79
+
80
+ nexturl = "http://www.google.com/search?num=100&hl=ja&pwst=1&q=ruby&start=100&sa=N"
81
+ origurl = "http://www.google.com/search?q=ruby&hl=ja&num=100"
82
+ np = Server.new.nextpage(origurl, nil, "UTF-8", Object.new)
83
+ assert_equal nexturl, np[:location]
84
+ end
85
+
86
+ def test_futaba_fallback_form
87
+ $logger.info "Test: #{__method__}"
88
+ reinit
89
+ $FALLBACK_WORDS = %w[次のページ]
90
+ $SITEINFO = [[ /./, SiteData.fallbacks[1] ]]
91
+
92
+ nexturl = "http://may.2chan.net/27/1.htm"
93
+ origurl = "http://may.2chan.net/27/futaba.htm"
94
+ np = Server.new.nextpage(origurl, nil, "cp932", Object.new)
95
+ assert_equal nexturl, np[:location]
96
+ end
97
+
98
+ def test_futaba_fallback_by_wrong_sitedata
99
+ $logger.info "Test: #{__method__}"
100
+ reinit
101
+
102
+ defnext %r{2chan} do
103
+ pageElement %{id("noelement")}
104
+ nextLink %{//a[@rel="prev"]}
105
+ end
106
+
107
+ $FALLBACK_WORDS = %w[次のページ]
108
+ SiteData.fallbacks.setup!
109
+
110
+ nexturl = "http://may.2chan.net/27/1.htm"
111
+ origurl = "http://may.2chan.net/27/futaba.htm"
112
+ np = Server.new.nextpage(origurl, nil, "cp932", Object.new)
113
+ assert_equal nexturl, np[:location]
114
+ end
115
+ end
116
+
117
+ class TestFallBackPredicate < Test::Unit::TestCase
118
+ def test_1
119
+ assert_equal '.="tugi" or contains(.,"Next")',
120
+ SiteData.fallback_predicate1(".", %w[tugi], %w[Next])
121
+ end
122
+ def test_2
123
+ assert_equal '.="tugi"', SiteData.fallback_predicate1(".", %w[tugi], [])
124
+ end
125
+ def test_3
126
+ assert_equal 'contains(.,"Next")', SiteData.fallback_predicate1(".", [], %w[Next])
127
+ end
128
+ end
129
+
130
+ class TestFallBackSiteData < Test::Unit::TestCase
131
+ def setup
132
+ SiteData.instance_eval { @fallbacks = nil }
133
+ end
134
+
135
+ def test_1
136
+ $FALLBACK_PATTERNS = %w[次へ]
137
+ $FALLBACK_WORDS = %w[次へ]
138
+ $FALLBACK_START_WORDS = %w[tugi]
139
+ assert_equal 4, SiteData.fallbacks.length
140
+ end
141
+
142
+ def test_2
143
+ $FALLBACK_PATTERNS = %w[次へ]
144
+ $FALLBACK_WORDS = %w[次へ]
145
+ $FALLBACK_START_WORDS = []
146
+ assert_equal 2, SiteData.fallbacks.length
147
+ end
148
+
149
+ def test_3
150
+ $FALLBACK_PATTERNS = %w[次へ]
151
+ $FALLBACK_WORDS = %w[次へ]
152
+ $FALLBACK_START_WORDS = %w[tugi]
153
+ assert SiteData.fallbacks.respond_to? :setup!
154
+ end
155
+
156
+
157
+ end
158
+
159
+
160
+ # >> Loaded suite -
161
+ # >> Started
162
+ # >> .....
163
+ # >>
164
+ # >> Finished in 1.154570634 seconds.
165
+ # >>
166
+ # >> 5 tests, 14 assertions, 0 failures, 0 errors, 0 pendings, 0 omissions, 0 notifications
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: w3m-autopagerize
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - rubikitch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-30 00:00:00 +09:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: AutoPagerize for w3m
17
+ email: rubikitch@ruby-lang.org
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - readme.org
26
+ - readme.html
27
+ - config.sample.rb
28
+ - bin/w3m-autopagerize-server.rb
29
+ - bin/next.cgi
30
+ - test/test-w3m-autopagerize.rb
31
+ has_rdoc: false
32
+ homepage: http://www.rubyist.net/~rubikitch/
33
+ post_install_message:
34
+ rdoc_options: []
35
+
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: "0"
43
+ version:
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ requirements: []
51
+
52
+ rubyforge_project: rubikitchrb
53
+ rubygems_version: 1.3.1
54
+ signing_key:
55
+ specification_version: 2
56
+ summary: AutoPagerize for w3m
57
+ test_files: []
58
+