w3m-autopagerize 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ #!/usr/local/bin/ruby19 -Ku
2
+ require 'drb'
3
+ require 'w3m-localcgi'
4
+ require 'kconv'
5
+
6
+ url = ENV['W3M_URL']
7
+
8
+ DRb.start_service
9
+ public :print # HACK to work w3mctl
10
+ srv = DRbObject.new_with_uri "druby://:9322"
11
+ if ENV['QUERY_STRING'] == 'crop'
12
+ hash = srv.crop_this_page(url, ENV['W3M_SOURCEFILE'], ENV['W3M_CHARSET'], self.extend(DRbUndumped))
13
+ else
14
+ hash = srv.nextpage(url, ENV['W3M_SOURCEFILE'], ENV['W3M_CHARSET'], self.extend(DRbUndumped))
15
+ end
16
+
17
+
18
+ if hash[:html]
19
+ puts "Content-Type: text/html"
20
+ puts "W3m-AutoPagerize-NextLink: #{hash[:nextLink]}" if hash[:nextLink]
21
+ puts "W3m-AutoPagerize-PageElement: #{hash[:pageElement]}" if hash[:pageElement]
22
+ puts
23
+ puts hash[:html]
24
+ else
25
+ if hash[:location]
26
+ puts "W3m-Control: BACK"
27
+ puts "W3m-Control: GOTO #{hash[:location]}"
28
+ puts "W3m-AutoPagerize-NextLink: #{hash[:nextLink]}" if hash[:nextLink]
29
+ end
30
+ puts
31
+ end
@@ -0,0 +1,489 @@
1
+ #!/usr/local/bin/ruby19
2
+ # -*- coding: utf-8 -*-
3
+ # (executable-interpret "ruby19 /m/home/rubikitch/w3m/cgi-bin/w3m-autopagerize/test-w3m-autopagerize.rb --no-use-color ")
4
+ # (executable-interpret "rm -f /log/w3m-autopagerize.log; w3m-autopagerize-server.rb -r")
5
+ start_time = Time.now
6
+ #Encoding.default_internal = "UTF-8"
7
+ require 'kconv'
8
+ require 'uri'
9
+ require 'rubygems'
10
+ require 'nokogiri'
11
+ require 'pp'
12
+ require 'logger'
13
+ require 'tmpdir'
14
+ require 'json'
15
+
16
+ ###########################################################################
17
+ # Configurable Variables #
18
+ ###########################################################################
19
+ $W3M_EXTRA_OPTIONS = ""
20
+ # see http://www.opera-wiki.com/index.php?FAQ%2F5.%E3%82%AB%E3%82%B9%E3%82%BF%E3%83%9E%E3%82%A4%E3%82%BA#k7bb0c80
21
+ $FALLBACK_PATTERNS = %w[次へ 次頁 次ページ 次項 次の 次を 先へ つぎへ つぎの
22
+ 進む next もっと見る ]
23
+ $FALLBACK_WORDS = %w[次 つぎ 続きます keep\ reading [→] 次一覧 Older\ Entries]
24
+ $FALLBACK_START_WORDS = %w[> > 次 つぎ Next NEXT next →]
25
+ $SITEINFO_IMPORT_URLS = %w[
26
+ http://wedata.net/databases/AutoPagerize/items.json
27
+ ]
28
+ $EXCLUDE_URLS = %w[
29
+ ^https?:\/\/.
30
+ ]
31
+ $LOG_FILE = $stderr
32
+
33
+ ###########################################################################
34
+ # DSL for nexturl #
35
+ ###########################################################################
36
+ $TEST_MODE = false
37
+ $SITEINFO = []
38
+ $client = nil
39
+ class SiteData < Struct.new(:nextLink, :insertBefore, :exampleUrl, :pageElement,
40
+ :block, :match)
41
+ def self.fallback_predicate1(text, words=$FALLBACK_WORDS, patterns=$FALLBACK_PATTERNS)
42
+ a = [
43
+ words.map{|w| %Q!#{text}="#{w}"!}.join(' or '),
44
+ patterns.map{|w| %Q!contains(#{text},"#{w}")!}.join(' or '),
45
+ ]
46
+ a.delete ""
47
+ a.join " or "
48
+ end
49
+
50
+ def self.fallback_predicate2(text, start_words=$FALLBACK_START_WORDS)
51
+ start_words.map{|w| %Q!starts-with(#{text},"#{w}")!}.join(' or ')
52
+ end
53
+
54
+ # link to next
55
+ def self.fallbacks
56
+ @fallbacks ||= lambda do
57
+ a = [
58
+ new("//a[#{fallback_predicate1('.')}]"),
59
+ new("//form[descendant::input[#{fallback_predicate1('@value')}]]"),
60
+ ]
61
+ if $FALLBACK_START_WORDS.to_a.length > 0
62
+ a.concat [
63
+ new("//a[#{fallback_predicate2('.')}]"),
64
+ new("//form[descendant::input[#{fallback_predicate2('@value')}]]"),
65
+ ]
66
+ end
67
+ a.extend(FallbackSetup)
68
+ end.call
69
+ end
70
+ module FallbackSetup
71
+ def setup!
72
+ each {|fallback| $SITEINFO << [/./, fallback]}
73
+ end
74
+ end
75
+
76
+ # Make the DSL pretty!
77
+ members.each do |m|
78
+ undef_method m
79
+ module_eval <<-EOC # hack for ruby-mode.el
80
+ #{'def'} #{m}(v=nil)
81
+ if v
82
+ self[:#{m}] = v
83
+ else
84
+ self[:#{m}]
85
+ end
86
+ end
87
+ EOC
88
+ end
89
+
90
+ def next_url(uri)
91
+ uri = URI(uri.to_s)
92
+ result = instance_exec(uri, match, &block) if block
93
+ xpath = nextLink
94
+ if xpath
95
+ nokogiri = $nokogiri_cache[uri.to_s]
96
+ $logger.info "#{__method__}: use xpath #{xpath}"
97
+ nodes = nokogiri.xpath(xpath)
98
+ node = nodes.first
99
+ $logger.debug "#{__method__}: nodes.length = #{nodes.length}"
100
+ nexturl = (node["href"] || node["action"] || node["value"]) rescue nil
101
+ # nexturl = nokogiri.xpath("#{xpath}/@href").first.content rescue nil
102
+ $logger.info "#{__method__}: nexturl = #{nexturl or 'NOT FOUND'}"
103
+ if nexturl
104
+ nexturl.gsub!(/ /, '+') # for some buggy sites not encoding spaces
105
+ uri.merge nexturl
106
+ end
107
+ else
108
+ $logger.info "#{__method__}: result = #{result}"
109
+ uri.merge result
110
+ end
111
+ end
112
+ end
113
+
114
+ def defnext(url_or_pattern, nexturl=nil, &b)
115
+ defnext_ url_or_pattern, nexturl do |u,m|
116
+ $logger.info "Use defnext for #{url_or_pattern}"
117
+ instance_exec(u, m, &b)
118
+ end
119
+ end
120
+
121
+ def defnext_(url_or_pattern, nexturl=nil, &block)
122
+ sd = SiteData.new
123
+ if nexturl
124
+ sd.block = lambda{|u,m| nexturl }
125
+ else
126
+ sd.block = block
127
+ end
128
+ $SITEINFO << [ url_or_pattern, sd ]
129
+ end
130
+
131
+
132
+ def addstring(url_or_pattern, string)
133
+ defnext_(url_or_pattern) {|u,m|
134
+ $logger.info "Use addstring for #{url_or_pattern}"
135
+ u.to_s + string
136
+ }
137
+ end
138
+
139
+ def increment(url_or_pattern, n=1)
140
+ defnext_(url_or_pattern) {|u,m|
141
+ $logger.info "Use increment for #{url_or_pattern}"
142
+ url=u.to_s
143
+ nextvar = m[1].to_i + n
144
+ url[ m.begin(1) ... m.end(1) ] = if m[1] =~ /^0/
145
+ format("%0#{m[1].length}d", nextvar)
146
+ else
147
+ nextvar.to_s
148
+ end
149
+ url
150
+ }
151
+ end
152
+
153
+ def w3mctl(*strings)
154
+ strings.each do |str|
155
+ if str
156
+ if str==true
157
+ $client.print "\r\n\r\n"
158
+ else
159
+ $client.print "W3m-Control: #{str}\r\n"
160
+ end
161
+ end
162
+ end
163
+ nil
164
+ end
165
+
166
+ ###########################################################################
167
+ # File.zread #
168
+ ###########################################################################
169
+ require 'zlib'
170
+
171
+ Zlib::GZIP_MAGIC = "\037\213"
172
+ Zlib::GZIP_MAGIC.force_encoding("ASCII-8BIT") if RUBY_VERSION >= "1.9"
173
+
174
+ def File.zread(file)
175
+ Object.module_eval do
176
+ open(file) do |f|
177
+ magic = f.read(2)
178
+ f.rewind
179
+ if magic == Zlib::GZIP_MAGIC
180
+ Zlib::GzipReader.wrap(f) {|gz| gz.read }
181
+ else
182
+ f.read
183
+ end
184
+ end
185
+ end
186
+ end
187
+
188
+ ###########################################################################
189
+ # content cache #
190
+ ###########################################################################
191
+ TMPFILE = Dir.tmpdir + "/w3m-autopagerize.tmp.html"
192
+ $content_cache = Hash.new do |h,url|
193
+ $logger.debug "cache miss: set $content_cache[#{url.inspect}]"
194
+ # use w3m to pass cookie
195
+ header, source = get_header_and_content(url)
196
+ $logger.debug "cache miss: source is html? = #{source =~ /<body/i and true}"
197
+ charset = normalize_charset(header[/charset=(.+)$/,1] || Kconv.guess(source))
198
+ source.force_encoding("ASCII-8BIT")
199
+ h[url] = [source, charset]
200
+ end
201
+
202
+ # BUG: libxml2 cannot handle id() function without doctype.
203
+ # http://labs.gmo.jp/blog/ku/2008/07/libxmlhtmlxpathid.html
204
+ DOCTYPE = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'
205
+ $nokogiri_cache = Hash.new do |h,url|
206
+ $logger.debug "cache miss: set $nokogiri_cache[#{url.inspect}]"
207
+ source, charset = $content_cache[url]
208
+ h[url] = Nokogiri::HTML(DOCTYPE+source, nil, charset)
209
+ end
210
+
211
+
212
+ ###########################################################################
213
+ # Utilities #
214
+ ###########################################################################
215
+ def get_header_and_content(url)
216
+ output = `w3m #$W3M_EXTRA_OPTIONS -dump_both -o accept_encoding='gzip' "#{url}"`
217
+ header, source = output.force_encoding("ASCII-8BIT").split(/\n\n/, 2)
218
+ open(TMPFILE,"wb"){|f| f.write source}
219
+ source = File.zread(TMPFILE).force_encoding "ASCII-8BIT"
220
+ [header, source]
221
+ ensure
222
+ File.unlink TMPFILE if File.exist? TMPFILE
223
+ end
224
+
225
+ def get_content(url)
226
+ if url =~ /^https?:/
227
+ get_header_and_content(url)[1]
228
+ else # local file
229
+ File.zread(File.expand_path(url)).force_encoding "ASCII-8BIT"
230
+ end
231
+ end
232
+
233
+ def normalize_charset(charset)
234
+ charset = charset.to_s
235
+ # FIXME I do not know other charsets than Japanese.
236
+ charset.downcase == "shift_jis" ? "cp932" : charset
237
+ end
238
+
239
+
240
+ def reinit
241
+ $SITEINFO = []
242
+ end
243
+
244
+ # unless "".respond_to? :force_encoding # for ruby 1.8
245
+ # class String
246
+ # def force_encoding(args); self end
247
+ # def encoding; Kconv.guess(self) end
248
+ # end
249
+ # end
250
+
251
+ ###########################################################################
252
+ # Entry Points #
253
+ ###########################################################################
254
+ class Server
255
+ def sitedata(url)
256
+ url = url.to_s
257
+ match = nil
258
+ sitedata = $SITEINFO.find{|re, block|
259
+ match = case re
260
+ when Regexp
261
+ url.match(re)
262
+ else
263
+ url == re.to_s
264
+ end
265
+ }[1]
266
+ sitedata.match = match
267
+ sitedata
268
+ end
269
+ private :sitedata
270
+
271
+ HTML_OUTPUT_FILE = "/tmp/w3m-autopagerize-tmp.html"
272
+ def crop_html(location, prev_url, sitedata)
273
+ $logger.debug "#{__method__}: url = #{location}"
274
+ location = location.to_s
275
+ nokogiri = $nokogiri_cache[location]
276
+ title = nokogiri.at("//title").to_html rescue "<title></title>"
277
+ begin
278
+ $logger.info "#{__method__}: use xpath #{sitedata.pageElement}"
279
+ nodes = nokogiri.xpath(sitedata.pageElement)
280
+ $logger.debug "#{__method__}: nodes.length = #{nodes.length}"
281
+ html_piece = nodes.to_html
282
+ raise if html_piece.strip.empty?
283
+ rescue
284
+ $logger.error "#{__method__}: failed to crop!"
285
+ html_piece = nokogiri.at("body").to_html
286
+ errmsg = %{<p>w3m-autopagerize failed to crop html but next url is found.<br />
287
+ xpath = #{sitedata.pageElement || 'pageElement not found'}
288
+ </p>
289
+ <hr>}
290
+ else
291
+ errmsg = ""
292
+ end
293
+ # BUG: Nokogiri emits superfluous &#13;.
294
+ html_piece.gsub! /&#13;/, '' # hack
295
+ # BUG: w3m cannot handle <script />, so replace it with <script></script>
296
+ html_piece.gsub! %r!(<script.+?)/>!, '\1></script>' # hack
297
+ %w[location title prev_url sitedata.pageElement sitedata.nextLink errmsg html_piece].each do |e|
298
+ # $logger.debug "#{__method__}: #{e}.encoding = #{eval('e').to_s.encoding}"
299
+ end
300
+ html = %{<html>
301
+ <head><base href="#{location}" />#{title}
302
+ <link rel="w3m-autopagerize-orig" href="#{location}" />
303
+ <link rel="w3m-autopagerize-prev" href="#{prev_url}" />
304
+ </head>
305
+ <body>
306
+ Original URL: <a href="#{location}">#{location}</a><br>
307
+ #{errmsg}
308
+ #{html_piece}
309
+ </body></html> }
310
+
311
+ { :html => html, :location => location,
312
+ :pageElement => sitedata.pageElement, :nextLink => sitedata.nextLink}
313
+ end
314
+ private :crop_html
315
+
316
+ def prefetch_next_location(location, sitedata)
317
+ Thread.start do
318
+ # sleep 1
319
+ $logger.debug "#{__method__}: #{location}"
320
+ # sitedata = sitedata location
321
+ newloc = sitedata.next_url(location)
322
+ $logger.debug "#{__method__}: new location: #{newloc}"
323
+ $nokogiri_cache[newloc.to_s]
324
+ end
325
+ # It uses Ordered Hash in Ruby 1.9
326
+ [$nokogiri_cache, $content_cache].each do |hash|
327
+ hash.delete hash.first[0] if hash.length > 3
328
+ end
329
+ end
330
+ private :prefetch_next_location
331
+
332
+ def prepare(url, srcfile, charset, client, method)
333
+ $logger.info "=================================================="
334
+ $logger.info "#{method}: entered url=#{url} charset=#{charset}"
335
+ $logger.debug "#{method}: W3M_SOURCEFILE = #{srcfile}" if srcfile
336
+ $client = client
337
+
338
+ src = File.zread(srcfile).force_encoding("ASCII-8BIT") if srcfile
339
+ if url =~ /^file:.*\/cgi-bin\// # from Local CGI
340
+ url = src.force_encoding("ASCII-8BIT")[%r!<base href=['"](.+?)['"]!, 1] # '"
341
+ $logger.info "#{method}: base url=#{url}"
342
+ else # from W3M_SOURCEFILE
343
+ $logger.debug "#{method}: set $content_cache[#{url.inspect}] from W3M_SOURCEFILE"
344
+ $logger.debug "#{method}: source is html? = #{src =~ /<body/i and true}"
345
+ $content_cache[url] = [src.force_encoding("ASCII-8BIT"), normalize_charset(charset)] if src
346
+ end
347
+ [ src, url ]
348
+ end
349
+
350
+ def crop_this_page(url, srcfile, charset, client)
351
+ src, url = prepare(url, srcfile, charset, client, __method__)
352
+
353
+ sitedata = sitedata url
354
+ begin
355
+ crop_html url, nil, sitedata
356
+ ensure
357
+ prefetch_next_location sitedata.next_url(url), sitedata
358
+ end
359
+ end
360
+
361
+
362
+ def nextpage(url, srcfile, charset, client)
363
+ src, url = prepare(url, srcfile, charset, client, __method__)
364
+ sitedata = sitedata url
365
+ location = sitedata.next_url(url)
366
+
367
+ if location
368
+ if sitedata.pageElement
369
+ $logger.debug "#{__method__}: location and pageElement found."
370
+ else
371
+ $logger.debug "#{__method__}: location found."
372
+ end
373
+ begin
374
+ crop_html location, url, sitedata
375
+ ensure
376
+ prefetch_next_location location, sitedata
377
+ end
378
+ else
379
+ fallback_nexturl = for fallback in SiteData.fallbacks
380
+ u = fallback.next_url(url) and break u
381
+ end
382
+ if fallback_nexturl
383
+ $logger.info "#{__method__}: fallback"
384
+ begin
385
+ crop_html fallback_nexturl, url, fallback
386
+ ensure
387
+ prefetch_next_location fallback_nexturl, fallback
388
+ end
389
+ else
390
+ $logger.debug "#{__method__}: no location."
391
+ raise "no location!"
392
+ end
393
+ end
394
+ rescue
395
+ html = %{<pre>Error!
396
+ xpath = #{sitedata.nextLink || 'nextLink not found'}
397
+ #{$!}
398
+ #{$@.pretty_inspect}
399
+ src_encoding=#{Kconv.guess(src || $content_cache[url].first)}
400
+ </pre>
401
+ }
402
+ $logger.error "#{__method__}: error!"
403
+ $logger.error "#{__method__}: #$!"
404
+ $logger.error "#{__method__}: #{$@.pretty_inspect}"
405
+ {:html => html}
406
+ end
407
+
408
+
409
+ # (executable-interpret "ruby19 -r w3m-autopagerize-server -e '$logger=Logger.new(); load_siteinfo'")
410
+ # (executable-interpret "ruby18 -r w3m-autopagerize-server -e '$logger=Logger.new(); load_siteinfo'")
411
+ def load_siteinfo
412
+ keys = %w[exampleUrl insertBefore pageElement nextLink]
413
+ $SITEINFO_IMPORT_URLS.each do |siteinfo_url|
414
+ JSON.parse(get_content(siteinfo_url).toutf8).each do |hash|
415
+ data = hash["data"]
416
+ if url = data["url"] and not $EXCLUDE_URLS.include? url
417
+ sd = SiteData.new data["nextLink"], data["insertBefore"],
418
+ data["exampleUrl"], data["pageElement"]
419
+ $SITEINFO << [Regexp.new(url), sd]
420
+ end
421
+ end
422
+ end
423
+ $logger.info "#{__method__}: loaded"
424
+ end
425
+
426
+ def restart
427
+ exec $0
428
+ end
429
+
430
+ def load_config_file(config_file)
431
+ if config_file == :ignore
432
+ $stderr.puts "load_config_file: config file is ignored!"
433
+ else
434
+ config_file = File.expand_path(config_file, File.dirname(__FILE__))
435
+ if File.file? config_file
436
+ load(config_file)
437
+ $stderr.puts "load_config_file: loaded #{config_file}"
438
+ else
439
+ $stderr.puts "load_config_file: config file #{config_file} not found!"
440
+ end
441
+ end
442
+ rescue Exception
443
+ $stderr.puts "load_config_file: error loading #{config_file}!"
444
+ end
445
+ end
446
+
447
+
448
+ if __FILE__==$0
449
+ require 'optparse'
450
+ require 'drb'
451
+ conf = Struct.new(:log_file, :siteinfo_url, :config_file).new
452
+ conf.config_file = File.expand_path "~/.w3m-autopagerize.rb"
453
+ ARGV.options {|o|
454
+ o.on("-l", "--log LOGFILE",
455
+ "Use log file.") {|x| conf.log_file = File.expand_path(x, File.dirname(__FILE__)) }
456
+ o.on("-s", "--siteinfo URL",
457
+ "URL of JSON data (SITEINFO).") {|x| conf.siteinfo_url = x}
458
+ o.on("-c", "--config CONFIG", "Use config file.") {|x| conf.config_file = x }
459
+ o.on("-f", "Ignore config file.") {|x| conf.config_file = :ignore }
460
+ o.on("-r", "--restart", "--reload",
461
+ "Restart the server.") {|x|
462
+ DRbObject.new_with_uri(%q!druby://:9322!).restart rescue nil
463
+ puts "w3m-autopagerize-server restarted."
464
+ exit
465
+ }
466
+ o.parse!
467
+ }
468
+
469
+ srv = Server.new
470
+ srv.load_config_file(conf.config_file)
471
+ $LOG_FILE = conf.log_file || $LOG_FILE
472
+ $stderr.puts "startup: log file = #{$LOG_FILE.inspect}"
473
+ $logger = Logger.new($LOG_FILE)
474
+ $SITEINFO_IMPORT_URLS = [ conf.siteinfo_url ] if conf.siteinfo_url
475
+ $logger.info "$SITEINFO_IMPORT_URLS = #{$SITEINFO_IMPORT_URLS.inspect}"
476
+
477
+ srv.load_siteinfo
478
+ GC.start
479
+ SiteData.fallbacks.setup!
480
+
481
+ $stderr.puts "start w3m-autopagerize-server.rb (#{Time.now-start_time} secs)"
482
+
483
+ Thread.start do
484
+ loop { sleep 300; GC.start }
485
+ end
486
+
487
+ DRb.start_service("druby://:9322", srv)
488
+ DRb.thread.join
489
+ end
@@ -0,0 +1,130 @@
1
+ # -*- coding: utf-8 -*-
2
+ # This file shows default setting. If you customize w3m-autopagerize,
3
+ # copy this file to ~/.w3m-autopagerize.rb.
4
+
5
+ # Extra options of w3m to fetch web page.
6
+ $W3M_EXTRA_OPTIONS = ""
7
+
8
+ # SITEINFO location. Set URL or filename of SITEINFO JSON data.
9
+ $SITEINFO_IMPORT_URLS = %w[
10
+ http://wedata.net/databases/AutoPagerize/items.json
11
+ ]
12
+ # Disable SITEINFO entries. The default is to ignore `"url": "^https?:\/\/."' entry.
13
+ $EXCLUDE_URLS = %w[
14
+ ^https?:\/\/.
15
+ ]
16
+
17
+ # Log file location
18
+ # =================
19
+ #
20
+ # The default destination of the log is stderr.
21
+ $LOG_FILE = $stderr
22
+ # If you use a log file, uncomment this. Note that the default
23
+ # directory of log file is the directory of w3m-autopagerize-server.rb.
24
+
25
+ # $LOG_FILE = "w3m-autopagerize.log"
26
+
27
+ # Fallback patterns
28
+ # =================
29
+ #
30
+ # If w3m-autopagerize cannot find next location, ie, wrong/no SITEINFO
31
+ # entry, w3m-autopagerize uses heuristic method to find next location
32
+ # with $FALLBACK_* variables. It is like FastForward of Opera.
33
+ #
34
+ # Links/buttons whose text is "next" or "keep reading" (full match) are
35
+ # considered as next location.
36
+ $FALLBACK_WORDS = %w[次 つぎ 続きます keep\ reading [→] 次一覧 Older\ Entries next Next NEXT]
37
+ # Links/buttons whose text starts with ">" (prefix match) are
38
+ # considered as next location.
39
+ $FALLBACK_START_WORDS = %w[> >]
40
+ # Links/buttons whose text contains ">" (partial match) are considered
41
+ # as next location.
42
+ $FALLBACK_PATTERNS = %w[次へ 次頁 次ページ 次項 次の 次を 先へ つぎへ つぎの 進む もっと見る ]
43
+
44
+ # Custom Location
45
+ # ===============
46
+ #
47
+ # You write `next' pages by URL rule. Use `addstring' and `increment'
48
+ # function. It is handy method to specify next location.
49
+ # It requires NO XPATH KNOWLEDGE, but some Regexp knowledge:-)
50
+ #
51
+ # Custom locations takes precedence over SITEINFO. It means that even
52
+ # if SITEINFO defines the configuration of a site, use custom
53
+ # location,
54
+ #
55
+ # For example, The next page of "http://www.dotup.org/" is
56
+ # "http://www.dotup.org/2.html". Use simply `addstring' function.
57
+ #
58
+ # The next page of "http://www.dotup.org/2.html" is
59
+ # "http://www.dotup.org/3.html". Use `increment' function with Regexp.
60
+ # The first occurrence of "(\d)" (digits) are replaced with the next number.
61
+ # Note that writing a URL Regexp by %r!URL Regexp! is handy.
62
+ addstring "http://www.dotup.org/", "2.html"
63
+ increment %r!http://www.dotup.org/(\d+).html$!
64
+ #
65
+ # `increment' can add any integer. For example,
66
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja&start=20"
67
+ # to
68
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja&start=40".
69
+ increment %r!^http://images.google.(?:co.jp|com)/.*start=(\d+)!, 20
70
+ #
71
+ # `addstring' function can accept Regexp. For example,
72
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja"
73
+ # to
74
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja&start=20".
75
+ #
76
+ # Note that `increment' of google image search must be defined BEFORE
77
+ # `addstring'. If `addstring' is before `increment', w3m-autopagerize
78
+ # considers the next page of
79
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja&start=20"
80
+ # as
81
+ # "http://images.google.co.jp/images?q=ruby&ie=Shift_JIS&hl=ja&start=20&start=20".
82
+ # It is because the URL matches both
83
+ # %r!^http://images.google.(?:co.jp|com)/! and
84
+ # %r!^http://images.google.(?:co.jp|com)/.*start=(\d+)!.
85
+ addstring %r!^http://images.google.(?:co.jp|com)/!, '&start=20'
86
+
87
+ # Custom Action
88
+ # =============
89
+ #
90
+ # You can execute any w3m commands for certain URL. For example, I
91
+ # (rubikitch) login hatena and open my hatena diary, execute
92
+ # "GOTO http://d.hatena.ne.jp/rubikitch/" and "DELETE_PREVBUF"
93
+ # three times. Use `defnext' and `w3mctl'.
94
+ #
95
+ # This is a good example of login and goto action. Note that when you
96
+ # use login and goto, you must set your login/password to
97
+ # ~/.w3m/pre_form file.
98
+ defnext "https://www.hatena.ne.jp/login" do
99
+ w3mctl "GOTO http://d.hatena.ne.jp/rubikitch/", "DELETE_PREVBUF", "DELETE_PREVBUF", "DELETE_PREVBUF"
100
+ end
101
+
102
+ # Custom SITEINFO
103
+ # ===============
104
+ #
105
+ # If you have your original SITEINFO for AutoPagerize, you can simply
106
+ # add the URL or filename into the top of $SITEINFO_IMPORT_URLS.
107
+ #
108
+ # The SITEINFO can be defined in Ruby DSL.
109
+ #
110
+ # In JSON:
111
+ #
112
+ # {
113
+ # "name": "(.~) what a quiet stiff (~.)",
114
+ # "data": {
115
+ # "insertBefore": "",
116
+ # "pageElement": "id(\"pixflow\")",
117
+ # "url": "^http:\/\/whytheluckystiff\\.net\/quiet\/",
118
+ # "nextLink": "id(\"header\")\/a[last()]",
119
+ # "exampleUrl": "http:\/\/whytheluckystiff.net\/quiet\/"
120
+ # }
121
+ # }
122
+ #
123
+ # In Ruby:
124
+ #
125
+ # defnext %r!^http://whytheluckystiff\.net/quiet/! do
126
+ # insertBefore ''
127
+ # pageElement 'id("pixflow")'
128
+ # nextLink 'id("header")/a[last()]'
129
+ # exampleUrl 'http://whytheluckystiff.net/quiet/'
130
+ # end
@@ -0,0 +1,252 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml"
4
+ lang="en" xml:lang="en">
5
+ <head>
6
+ <title>AutoPagerize for w3m</title>
7
+ <meta http-equiv="Content-Type" content="text/html;charset=euc-jp"/>
8
+ <meta name="generator" content="Org-mode"/>
9
+ <meta name="generated" content="2009-01-30"/>
10
+ <meta name="author" content="rubikitch"/>
11
+ <style type="text/css">
12
+ <!--/*--><![CDATA[/*><!--*/
13
+ html { font-family: Times, serif; font-size: 12pt; }
14
+ .title { text-align: center; }
15
+ .todo { color: red; }
16
+ .done { color: green; }
17
+ .tag { background-color:lightblue; font-weight:normal }
18
+ .target { }
19
+ .timestamp { color: grey }
20
+ .timestamp-kwd { color: CadetBlue }
21
+ p.verse { margin-left: 3% }
22
+ pre {
23
+ border: 1pt solid #AEBDCC;
24
+ background-color: #F3F5F7;
25
+ padding: 5pt;
26
+ font-family: courier, monospace;
27
+ font-size: 90%;
28
+ overflow:auto;
29
+ }
30
+ table { border-collapse: collapse; }
31
+ td, th { vertical-align: top; }
32
+ dt { font-weight: bold; }
33
+ div.figure { padding: 0.5em; }
34
+ div.figure p { text-align: center; }
35
+ .linenr { font-size:smaller }
36
+ .code-highlighted {background-color:#ffff00;}
37
+ .org-info-js_info-navigation { border-style:none; }
38
+ #org-info-js_console-label { font-size:10px; font-weight:bold;
39
+ white-space:nowrap; }
40
+ .org-info-js_search-highlight {background-color:#ffff00; color:#000000;
41
+ font-weight:bold; }
42
+ /*]]>*/-->
43
+ </style>
44
+ <script type="text/javascript">
45
+ <!--/*--><![CDATA[/*><!--*/
46
+ function CodeHighlightOn(elem, id)
47
+ {
48
+ var target = document.getElementById(id);
49
+ if(null != target) {
50
+ elem.cacheClassElem = elem.className;
51
+ elem.cacheClassTarget = target.className;
52
+ target.className = "code-highlighted";
53
+ elem.className = "code-highlighted";
54
+ }
55
+ }
56
+ function CodeHighlightOff(elem, id)
57
+ {
58
+ var target = document.getElementById(id);
59
+ if(elem.cacheClassElem)
60
+ elem.className = elem.cacheClassElem;
61
+ if(elem.cacheClassTarget)
62
+ target.className = elem.cacheClassTarget;
63
+ }
64
+ /*]]>*/-->
65
+ </script>
66
+ </head><body>
67
+ <h1 class="title">AutoPagerize for w3m</h1>
68
+
69
+ <p>AutoPagerize for w3m <a href="http://rubikitchrb.rubyforge.org/">http://rubikitchrb.rubyforge.org/</a>
70
+ </p>
71
+ <p>
72
+ Copyright (c) 2009 rubikitch &lt;rubikitch@ruby-lang.org&gt; <a href="http://www.rubyist.net/~rubikitch/">http://www.rubyist.net/~rubikitch/</a>
73
+ </p>
74
+ <p>
75
+ Use and distribution subject to the terms of the Ruby license.
76
+ </p>
77
+ <div id="table-of-contents">
78
+ <h2>Table of Contents</h2>
79
+ <div id="text-table-of-contents">
80
+ <ul>
81
+ <li><a href="#sec-1">1 Overview </a></li>
82
+ <li><a href="#sec-2">2 Programs </a>
83
+ <ul>
84
+ <li><a href="#sec-2.1">2.1 w3m-autopagerize-server.rb </a></li>
85
+ <li><a href="#sec-2.2">2.2 next.cgi </a></li>
86
+ <li><a href="#sec-2.3">2.3 config.sample.rb </a></li>
87
+ </ul>
88
+ </li>
89
+ <li><a href="#sec-3">3 Installation </a>
90
+ <ul>
91
+ <li><a href="#sec-3.1">3.1 Install AutoPagerize for w3m </a></li>
92
+ <li><a href="#sec-3.2">3.2 Copy config file </a></li>
93
+ <li><a href="#sec-3.3">3.3 Local CGI setup </a></li>
94
+ <li><a href="#sec-3.4">3.4 Key bind </a></li>
95
+ </ul>
96
+ </li>
97
+ <li><a href="#sec-4">4 Usage </a></li>
98
+ <li><a href="#sec-5">5 License </a></li>
99
+ </ul>
100
+ </div>
101
+ </div>
102
+
103
+ <div id="outline-container-1" class="outline-2">
104
+ <h2 id="sec-1">1 Overview </h2>
105
+ <div id="text-1">
106
+
107
+ <p>AutoPagerize for w3m finds next link and extracts page contents. It
108
+ consists of dRuby server program (w3m-autopagerize-server.rb) and w3m
109
+ Local CGI program (next.cgi).
110
+ </p>
111
+ </div>
112
+
113
+ </div>
114
+
115
+ <div id="outline-container-2" class="outline-2">
116
+ <h2 id="sec-2">2 Programs </h2>
117
+ <div id="text-2">
118
+
119
+
120
+ </div>
121
+
122
+ <div id="outline-container-2.1" class="outline-3">
123
+ <h3 id="sec-2.1">2.1 w3m-autopagerize-server.rb </h3>
124
+ <div id="text-2.1">
125
+
126
+ <p>AutoPagerize for w3m uses dRuby server w3m-autopagerize-server.rb
127
+ because initializing site data is time-consuming. Before using
128
+ AutoPagerize for w3m, you have to invoke w3m-autopagerize-server.rb!
129
+ w3m-autopagerize-server.rb loads config file (~/.w3m-autopagerize.rb)
130
+ if any and reads AutoPagerize SITEINFO data from wedata.net by
131
+ default.
132
+ </p>
133
+ </div>
134
+
135
+ </div>
136
+
137
+ <div id="outline-container-2.2" class="outline-3">
138
+ <h3 id="sec-2.2">2.2 next.cgi </h3>
139
+ <div id="text-2.2">
140
+
141
+ <p>next.cgi is Local CGI program to ask w3m-autopagerize-server.rb to get next page.
142
+ </p>
143
+ </div>
144
+
145
+ </div>
146
+
147
+ <div id="outline-container-2.3" class="outline-3">
148
+ <h3 id="sec-2.3">2.3 config.sample.rb </h3>
149
+ <div id="text-2.3">
150
+
151
+ <p>The sample config file to customize. See <a href="config.sample.rb">file:config.sample.rb</a> for detail.
152
+ </p>
153
+ </div>
154
+ </div>
155
+
156
+ </div>
157
+
158
+ <div id="outline-container-3" class="outline-2">
159
+ <h2 id="sec-3">3 Installation </h2>
160
+ <div id="text-3">
161
+
162
+
163
+ </div>
164
+
165
+ <div id="outline-container-3.1" class="outline-3">
166
+ <h3 id="sec-3.1">3.1 Install AutoPagerize for w3m </h3>
167
+ <div id="text-3.1">
168
+
169
+ <p>AutoPagerize for w3m works with Ruby 1.9 only! So, you have to install
170
+ Ruby 1.9.x. Then simply issue:
171
+ </p>
172
+ <pre class="example">
173
+ $ sudo ruby1.9 -S gem install w3m-autopagerize
174
+ </pre>
175
+
176
+ </div>
177
+
178
+ </div>
179
+
180
+ <div id="outline-container-3.2" class="outline-3">
181
+ <h3 id="sec-3.2">3.2 Copy config file </h3>
182
+ <div id="text-3.2">
183
+
184
+ <p>If you customize AutoPagerize for w3m, copy config.sample.rb to
185
+ ~/.w3m-autopagerize.rb and edit it.
186
+ </p>
187
+ </div>
188
+
189
+ </div>
190
+
191
+ <div id="outline-container-3.3" class="outline-3">
192
+ <h3 id="sec-3.3">3.3 Local CGI setup </h3>
193
+ <div id="text-3.3">
194
+
195
+ <p>Local CGI program next.cgi is installed at
196
+ /usr/local/bin/next.cgi. You have to make w3m find it. Add
197
+ /usr/local/bin to your Local CGI path (cgi<sub>bin</sub>), or make symlink.
198
+ </p>
199
+ <pre class="example">
200
+ $ cd ~/w3m/cgi-bin; ln -s /usr/local/bin/next.cgi
201
+ </pre>
202
+
203
+ </div>
204
+
205
+ </div>
206
+
207
+ <div id="outline-container-3.4" class="outline-3">
208
+ <h3 id="sec-3.4">3.4 Key bind </h3>
209
+ <div id="text-3.4">
210
+
211
+ <p>Bind AutoPagerize for w3m to your favorite key. Edit ~/.w3m/keymap and add this line.
212
+ </p>
213
+ <pre class="example">
214
+ keymap x GOTO file:/cgi-bin/next.cgi
215
+ </pre>
216
+
217
+ </div>
218
+ </div>
219
+
220
+ </div>
221
+
222
+ <div id="outline-container-4" class="outline-2">
223
+ <h2 id="sec-4">4 Usage </h2>
224
+ <div id="text-4">
225
+
226
+ <ul>
227
+ <li>
228
+ Press `x' key to go to next page.
229
+ </li>
230
+ <li>
231
+ Press `=' key to see information, eg. XPath to get next page.
232
+
233
+ </li>
234
+ </ul>
235
+ </div>
236
+
237
+ </div>
238
+
239
+ <div id="outline-container-5" class="outline-2">
240
+ <h2 id="sec-5">5 License </h2>
241
+ <div id="text-5">
242
+
243
+ <p>AutoPagerize for w3m is licensed under the same terms as Ruby.
244
+ </p></div>
245
+ </div>
246
+ <div id="postamble"><p class="author"> Author: rubikitch
247
+ <a href="mailto:rubikitch@ruby-lang.org">&lt;rubikitch@ruby-lang.org&gt;</a>
248
+ </p>
249
+ <p class="date"> Date: 2009-01-30</p>
250
+ <p>HTML generated by org-mode 6.18 in emacs 22</p>
251
+ </div></body>
252
+ </html>
@@ -0,0 +1,65 @@
1
+ #+TITLE: AutoPagerize for w3m
2
+ #+AUTHOR: rubikitch
3
+ #+EMAIL: rubikitch@ruby-lang.org
4
+ #+DATE: 2009-01-30
5
+ #+LANGUAGE: en
6
+ #+OPTIONS: H:3 num:t toc:t \n:nil @:t ::t |:t ^:t -:t f:t *:t TeX:t LaTeX:nil skip:nil d:nil todo:t pri:nil tags:not-in-toc
7
+ #+INFOJS_OPT: view:nil toc:nil ltoc:t mouse:underline buttons:0 path:http://orgmode.org/org-info.js
8
+ #+EXPORT_SELECT_TAGS: export
9
+ #+EXPORT_EXCLUDE_TAGS: noexport
10
+ #+LINK_UP:
11
+ #+LINK_HOME:
12
+ AutoPagerize for w3m http://rubikitchrb.rubyforge.org/
13
+
14
+ Copyright (c) 2009 rubikitch <rubikitch@ruby-lang.org> http://www.rubyist.net/~rubikitch/
15
+
16
+ Use and distribution subject to the terms of the Ruby license.
17
+ * Overview
18
+ AutoPagerize for w3m finds next link and extracts page contents. It
19
+ consists of dRuby server program (w3m-autopagerize-server.rb) and w3m
20
+ Local CGI program (next.cgi).
21
+
22
+ * Programs
23
+ ** w3m-autopagerize-server.rb
24
+ AutoPagerize for w3m uses dRuby server w3m-autopagerize-server.rb
25
+ because initializing site data is time-consuming. Before using
26
+ AutoPagerize for w3m, you have to invoke w3m-autopagerize-server.rb!
27
+ w3m-autopagerize-server.rb loads config file (~/.w3m-autopagerize.rb)
28
+ if any and reads AutoPagerize SITEINFO data from wedata.net by
29
+ default.
30
+
31
+ ** next.cgi
32
+ next.cgi is Local CGI program to ask w3m-autopagerize-server.rb to get next page.
33
+
34
+ ** config.sample.rb
35
+ The sample config file to customize. See file:config.sample.rb for detail.
36
+
37
+ * Installation
38
+ ** Install AutoPagerize for w3m
39
+ AutoPagerize for w3m works with Ruby 1.9 only! So, you have to install
40
+ Ruby 1.9.x. Then simply issue:
41
+
42
+ : $ sudo ruby1.9 -S gem install w3m-autopagerize
43
+
44
+ ** Copy config file
45
+ If you customize AutoPagerize for w3m, copy config.sample.rb to
46
+ ~/.w3m-autopagerize.rb and edit it.
47
+
48
+ ** Local CGI setup
49
+ Local CGI program next.cgi is installed at
50
+ /usr/local/bin/next.cgi. You have to make w3m find it. Add
51
+ /usr/local/bin to your Local CGI path (cgi_bin), or make symlink.
52
+
53
+ : $ cd ~/w3m/cgi-bin; ln -s /usr/local/bin/next.cgi
54
+
55
+ ** Key bind
56
+ Bind AutoPagerize for w3m to your favorite key. Edit ~/.w3m/keymap and add this line.
57
+
58
+ : keymap x GOTO file:/cgi-bin/next.cgi
59
+
60
+ * Usage
61
+ - Press `x' key to go to next page.
62
+ - Press `=' key to see information, eg. XPath to get next page.
63
+
64
+ * License
65
+ AutoPagerize for w3m is licensed under the same terms as Ruby.
@@ -0,0 +1,166 @@
1
+ #!/usr/local/bin/ruby19
2
+ # -*- coding: utf-8 -*-
3
+ # (executable-interpret "ruby19 /m/home/rubikitch/w3m/cgi-bin/w3m-autopagerize/test-w3m-autopagerize.rb --no-use-color ")
4
+ require 'fileutils'
5
+ FileUtils.rm_f "test.log"
6
+
7
+ require 'test/unit'
8
+ require 'open-uri'
9
+ require 'script'
10
+ require 'w3m-autopagerize-server' # !> method redefined; discarding old debug_with_time
11
+
12
+ $TEST_MODE = true
13
+ $W3M_EXTRA_OPTIONS = "-o http_proxy=http://127.0.0.1:8339/"
14
+ $logger = Logger.new "test.log"
15
+ class TestAutoPagerize < Test::Unit::TestCase
16
+ def test_hatena_success
17
+ $logger.info "Test: #{__method__}"
18
+ reinit
19
+ defnext %r{^https?:\/\/(?:d2?|[^.]+\.g)\.hatena\.ne\.jp\/} do
20
+ exampleUrl %{http://os0x.g.hatena.ne.jp/os0x/}
21
+ pageElement %{id("days")}
22
+ nextLink %{//a[@rel="prev"]}
23
+ end
24
+
25
+ nexturl = "http://d.hatena.ne.jp/rubikitch/20090110/1231524557"
26
+ origurl = "http://d.hatena.ne.jp/rubikitch/20090113/1231844047"
27
+ np = Server.new.nextpage(origurl, nil, nil, Object.new)
28
+ assert_equal nexturl, np[:location]
29
+ assert_match(/<base/, np[:html])
30
+ assert_equal %{id("days")}, np[:pageElement]
31
+ assert_equal %{//a[@rel="prev"]}, np[:nextLink]
32
+ end
33
+
34
+ def test_hatena_fail
35
+ $logger.info "Test: #{__method__}"
36
+ reinit
37
+ defnext %r{^https?:\/\/(?:d2?|[^.]+\.g)\.hatena\.ne\.jp\/} do
38
+ exampleUrl %{http://os0x.g.hatena.ne.jp/os0x/}
39
+ pageElement %{id("noelement")}
40
+ nextLink %{//a[@rel="prev"]}
41
+ end
42
+
43
+ nexturl = "http://d.hatena.ne.jp/rubikitch/20090110/1231524557"
44
+ origurl = "http://d.hatena.ne.jp/rubikitch/20090113/1231844047"
45
+ np = Server.new.nextpage(origurl, nil, nil, Object.new)
46
+ assert_equal nexturl, np[:location]
47
+ assert_match(/failed to crop html/, np[:html])
48
+ assert_equal %{id("noelement")}, np[:pageElement]
49
+ assert_equal %{//a[@rel="prev"]}, np[:nextLink]
50
+ end
51
+
52
+ def test_google_addstring
53
+ $logger.info "Test: #{__method__}"
54
+ reinit
55
+ addstring %r!^http://www.google.(?:co.jp|com)/search!, '&start=100'
56
+
57
+ nexturl = "http://www.google.com/search?q=ruby&hl=ja&num=100&start=100"
58
+ origurl = "http://www.google.com/search?q=ruby&hl=ja&num=100"
59
+ np = Server.new.nextpage(origurl, nil, nil, Object.new)
60
+ assert_equal nexturl, np[:location]
61
+ end
62
+
63
+ def test_google_increment
64
+ reinit
65
+ increment %r!^http://www.google.(?:co.jp|com)/search.*start=(\d+)!, 100
66
+
67
+ nexturl = "http://www.google.com/search?q=ruby&hl=ja&num=100&start=200"
68
+ origurl = "http://www.google.com/search?q=ruby&hl=ja&num=100&start=100"
69
+ np = Server.new.nextpage(origurl, nil, nil, Object.new)
70
+ assert_equal nexturl, np[:location]
71
+ end
72
+
73
+ def test_google_fallback_link
74
+ $logger.info "Test: #{__method__}"
75
+ reinit
76
+ $FALLBACK_PATTERNS = %w[次へ]
77
+ $FALLBACK_WORDS = %w[次へ]
78
+ $SITEINFO = [[ /./, SiteData.fallbacks[0] ]]
79
+
80
+ nexturl = "http://www.google.com/search?num=100&hl=ja&pwst=1&q=ruby&start=100&sa=N"
81
+ origurl = "http://www.google.com/search?q=ruby&hl=ja&num=100"
82
+ np = Server.new.nextpage(origurl, nil, "UTF-8", Object.new)
83
+ assert_equal nexturl, np[:location]
84
+ end
85
+
86
+ def test_futaba_fallback_form
87
+ $logger.info "Test: #{__method__}"
88
+ reinit
89
+ $FALLBACK_WORDS = %w[次のページ]
90
+ $SITEINFO = [[ /./, SiteData.fallbacks[1] ]]
91
+
92
+ nexturl = "http://may.2chan.net/27/1.htm"
93
+ origurl = "http://may.2chan.net/27/futaba.htm"
94
+ np = Server.new.nextpage(origurl, nil, "cp932", Object.new)
95
+ assert_equal nexturl, np[:location]
96
+ end
97
+
98
+ def test_futaba_fallback_by_wrong_sitedata
99
+ $logger.info "Test: #{__method__}"
100
+ reinit
101
+
102
+ defnext %r{2chan} do
103
+ pageElement %{id("noelement")}
104
+ nextLink %{//a[@rel="prev"]}
105
+ end
106
+
107
+ $FALLBACK_WORDS = %w[次のページ]
108
+ SiteData.fallbacks.setup!
109
+
110
+ nexturl = "http://may.2chan.net/27/1.htm"
111
+ origurl = "http://may.2chan.net/27/futaba.htm"
112
+ np = Server.new.nextpage(origurl, nil, "cp932", Object.new)
113
+ assert_equal nexturl, np[:location]
114
+ end
115
+ end
116
+
117
+ class TestFallBackPredicate < Test::Unit::TestCase
118
+ def test_1
119
+ assert_equal '.="tugi" or contains(.,"Next")',
120
+ SiteData.fallback_predicate1(".", %w[tugi], %w[Next])
121
+ end
122
+ def test_2
123
+ assert_equal '.="tugi"', SiteData.fallback_predicate1(".", %w[tugi], [])
124
+ end
125
+ def test_3
126
+ assert_equal 'contains(.,"Next")', SiteData.fallback_predicate1(".", [], %w[Next])
127
+ end
128
+ end
129
+
130
+ class TestFallBackSiteData < Test::Unit::TestCase
131
+ def setup
132
+ SiteData.instance_eval { @fallbacks = nil }
133
+ end
134
+
135
+ def test_1
136
+ $FALLBACK_PATTERNS = %w[次へ]
137
+ $FALLBACK_WORDS = %w[次へ]
138
+ $FALLBACK_START_WORDS = %w[tugi]
139
+ assert_equal 4, SiteData.fallbacks.length
140
+ end
141
+
142
+ def test_2
143
+ $FALLBACK_PATTERNS = %w[次へ]
144
+ $FALLBACK_WORDS = %w[次へ]
145
+ $FALLBACK_START_WORDS = []
146
+ assert_equal 2, SiteData.fallbacks.length
147
+ end
148
+
149
+ def test_3
150
+ $FALLBACK_PATTERNS = %w[次へ]
151
+ $FALLBACK_WORDS = %w[次へ]
152
+ $FALLBACK_START_WORDS = %w[tugi]
153
+ assert SiteData.fallbacks.respond_to? :setup!
154
+ end
155
+
156
+
157
+ end
158
+
159
+
160
+ # >> Loaded suite -
161
+ # >> Started
162
+ # >> .....
163
+ # >>
164
+ # >> Finished in 1.154570634 seconds.
165
+ # >>
166
+ # >> 5 tests, 14 assertions, 0 failures, 0 errors, 0 pendings, 0 omissions, 0 notifications
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: w3m-autopagerize
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - rubikitch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-30 00:00:00 +09:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: AutoPagerize for w3m
17
+ email: rubikitch@ruby-lang.org
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - readme.org
26
+ - readme.html
27
+ - config.sample.rb
28
+ - bin/w3m-autopagerize-server.rb
29
+ - bin/next.cgi
30
+ - test/test-w3m-autopagerize.rb
31
+ has_rdoc: false
32
+ homepage: http://www.rubyist.net/~rubikitch/
33
+ post_install_message:
34
+ rdoc_options: []
35
+
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: "0"
43
+ version:
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ requirements: []
51
+
52
+ rubyforge_project: rubikitchrb
53
+ rubygems_version: 1.3.1
54
+ signing_key:
55
+ specification_version: 2
56
+ summary: AutoPagerize for w3m
57
+ test_files: []
58
+