bwkfanboy 1.4.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +7 -0
  3. data/Gemfile.lock +51 -0
  4. data/Procfile +1 -0
  5. data/README.rdoc +40 -77
  6. data/Rakefile +13 -48
  7. data/bin/bwkfanboy +47 -166
  8. data/bin/bwkfanboy_generate +7 -19
  9. data/bin/bwkfanboy_parse +21 -17
  10. data/bwkfanboy.gemspec +40 -0
  11. data/config.ru +3 -0
  12. data/doc/NEWS.rdoc +21 -79
  13. data/doc/plugin.rdoc +63 -79
  14. data/etc/bwkfanboy.yaml +2 -0
  15. data/etc/sinatra.rb +34 -0
  16. data/lib/bwkfanboy/cliconfig.rb +141 -0
  17. data/lib/bwkfanboy/cliutils.rb +114 -0
  18. data/lib/bwkfanboy/fetch.rb +22 -24
  19. data/lib/bwkfanboy/generator.rb +78 -0
  20. data/lib/bwkfanboy/home.rb +53 -0
  21. data/lib/bwkfanboy/meta.rb +5 -2
  22. data/lib/bwkfanboy/plugin.rb +247 -0
  23. data/lib/bwkfanboy/plugin_skeleton.erb +19 -23
  24. data/lib/bwkfanboy/server.rb +73 -0
  25. data/lib/bwkfanboy/utils.rb +39 -129
  26. data/plugins/bwk.rb +25 -0
  27. data/plugins/econlib.rb +22 -0
  28. data/plugins/freebsd-ports-update.rb +73 -0
  29. data/plugins/inc.rb +29 -0
  30. data/plugins/test.rb +29 -0
  31. data/public/.gitattributes +1 -0
  32. data/public/favicon.ico +0 -0
  33. data/public/jquery-1.7.2.min.js +0 -0
  34. data/public/list.js +111 -0
  35. data/public/loading.gif +0 -0
  36. data/public/style.css +54 -0
  37. data/shotgun.rb +20 -0
  38. data/test/example/.gitattributes +1 -0
  39. data/test/example/.gitignore +1 -0
  40. data/test/example/02/plugins/bwk.html +0 -0
  41. data/test/{plugins → example/02/plugins}/empty.rb +0 -0
  42. data/test/example/02/plugins/garbage.rb +1 -0
  43. data/test/example/02/plugins/inc.html +0 -0
  44. data/test/helper.rb +30 -27
  45. data/test/helper_cliutils.rb +34 -0
  46. data/test/test_cli.rb +86 -0
  47. data/test/test_fetch.rb +49 -18
  48. data/test/test_generate.rb +43 -16
  49. data/test/test_home.rb +33 -0
  50. data/test/test_plugin.rb +141 -0
  51. data/test/test_server.rb +21 -32
  52. data/views/list.haml +38 -0
  53. metadata +223 -110
  54. data/bin/bwkfanboy_fetch +0 -13
  55. data/bin/bwkfanboy_server +0 -126
  56. data/doc/README.erb +0 -114
  57. data/doc/README.rdoc +0 -141
  58. data/doc/TODO +0 -7
  59. data/doc/bwkfanboy_fetch.rdoc +0 -4
  60. data/doc/bwkfanboy_generate.rdoc +0 -7
  61. data/doc/bwkfanboy_parse.rdoc +0 -7
  62. data/doc/bwkfanboy_server.rdoc +0 -35
  63. data/doc/rakefile.rb +0 -59
  64. data/lib/bwkfanboy/generate.rb +0 -63
  65. data/lib/bwkfanboy/parser.rb +0 -156
  66. data/lib/bwkfanboy/plugins/bwk.rb +0 -33
  67. data/lib/bwkfanboy/plugins/econlib.rb +0 -34
  68. data/lib/bwkfanboy/plugins/freebsd-ports-update.rb +0 -76
  69. data/lib/bwkfanboy/plugins/inc.rb +0 -37
  70. data/lib/bwkfanboy/schema.js +0 -39
  71. data/test/popen4.sh +0 -4
  72. data/test/rake_git.rb +0 -36
  73. data/test/semis/Rakefile +0 -35
  74. data/test/semis/bwk.html +0 -393
  75. data/test/semis/bwk.json +0 -82
  76. data/test/semis/econlib.html +0 -21
  77. data/test/semis/inc.html +0 -1067
  78. data/test/semis/links.txt +0 -4
  79. data/test/test_parse.rb +0 -27
  80. data/test/xml-clean.sh +0 -8
  81. data/web/bwkfanboy.cgi +0 -36
@@ -1,156 +0,0 @@
1
- require 'json'
2
- require 'date'
3
-
4
- require_relative 'utils'
5
-
6
- # :include: ../../doc/README.rdoc
7
- module Bwkfanboy
8
-
9
- # :include: ../../doc/plugin.rdoc
10
- class Parse
11
- ENTRIES_MAX = 128
12
-
13
- attr_reader :opt
14
-
15
- def initialize(opt = [])
16
- @entries = []
17
- @opt = opt
18
- end
19
-
20
- # Invokes #myparse & checks if it has grabbed something.
21
- def parse(stream)
22
- @entries = []
23
- begin
24
- myparse(stream)
25
- rescue
26
- @entries = []
27
- Utils.errx(1, "parsing failed: #{$!}\n\nBacktrace:\n\n#{$!.backtrace.join("\n")}")
28
- end
29
- Utils.errx(1, "plugin return no output") if @entries.length == 0
30
- end
31
-
32
- def uri()
33
- m = get_meta()
34
- eval("\"#{m::URI}\"")
35
- end
36
-
37
- # Prints entries in 'key: value' formatted strings. Intended for
38
- # debugging.
39
- def dump()
40
- @entries.each {|i|
41
- puts "title : " + i[:title]
42
- puts "link : " + i[:link]
43
- puts "updated : " + i[:updated]
44
- puts "author : " + i[:author]
45
- puts "content : " + i[:content]
46
- puts ""
47
- }
48
- end
49
-
50
- def to_json()
51
- # guess the time of the most recent entry
52
- u = DateTime.parse() # January 1, 4713 BCE
53
- @entries.each {|i|
54
- t = DateTime.parse(i[:updated])
55
- u = t if t > u
56
- }
57
-
58
- m = get_meta()
59
- uri = eval("\"#{m::URI}\"")
60
- j = {
61
- channel: {
62
- updated: u,
63
- id: uri,
64
- author: Meta::NAME, # just a placeholder
65
- title: m::TITLE,
66
- link: uri,
67
- x_entries_content_type: m::CONTENT_TYPE
68
- },
69
- x_entries: @entries
70
- }
71
- Utils::cfg[:verbose] >= 1 ? JSON.pretty_generate(j) : JSON.generate(j)
72
- end
73
-
74
- # After loading a plugin, one can do basic validation of the
75
- # plugin's class with the help of this method.
76
- def check
77
- m = get_meta()
78
- begin
79
- [:URI, :ENC, :VERSION, :COPYRIGHT, :TITLE, :CONTENT_TYPE].each {|i|
80
- fail "#{m}::#{i} not defined or empty" if (! m.const_defined?(i) || m.const_get(i) =~ /^\s*$/)
81
- }
82
-
83
- if m::URI =~ /#\{.+?\}/ && @opt.size == 0
84
- fail 'additional options required'
85
- end
86
- rescue
87
- Utils.errx(1, "incomplete plugin's instance: #{$!}")
88
- end
89
- end
90
-
91
- # Prints plugin's meta information.
92
- def dump_info()
93
- m = get_meta()
94
- puts "Version : #{m::VERSION}"
95
- puts "Copyright : #{m::COPYRIGHT}"
96
- puts "Title : #{m::TITLE}"
97
- puts "URI : #{uri}"
98
- end
99
-
100
- protected
101
-
102
- # This *must* be overridden in the child.
103
- def myparse(stream)
104
- raise "plugin isn't finished yet"
105
- end
106
-
107
- # Tries to parse _s_ as a date string. Return the result in ISO 8601
108
- # format.
109
- def date(s)
110
- begin
111
- DateTime.parse(clean(s)).iso8601()
112
- rescue
113
- Utils.vewarnx(2, "#{s} is unparsable; date is set to current")
114
- DateTime.now().iso8601()
115
- end
116
- end
117
-
118
- # will help you to check if there is a
119
- def toobig?
120
- return true if @entries.length >= ENTRIES_MAX
121
- return false
122
- end
123
-
124
- def <<(t)
125
- if toobig? then
126
- Utils.warnx("reached max number of entries (#{ENTRIES_MAX})")
127
- return @entries
128
- end
129
-
130
- %w(updated author link).each { |i|
131
- fail "unable to extract '#{i}'" if ! t.key?(i.to_sym) || t[i.to_sym] == nil || t[i.to_sym].empty?
132
- }
133
- %w(title content).each { |i|
134
- fail "missing '#{i}'" if ! t.key?(i.to_sym) || t[i.to_sym] == nil
135
- }
136
- # a redundant check if user hasn't redefined date() method
137
- if t[:updated] !~ /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/ then
138
- fail "'#{t[:updated]}' isn't in iso8601 format"
139
- end
140
- @entries << t
141
- end
142
-
143
- private
144
-
145
- def clean(s)
146
- s.gsub(/\s+/, ' ').strip()
147
- end
148
-
149
- def get_meta()
150
- Utils.errx(1, "incomplete plugin: no #{self.class}::Meta module") if (! defined?(self.class::Meta) || ! self.class::Meta.is_a?(Module))
151
- self.class::Meta
152
- end
153
-
154
- end # class
155
-
156
- end # module
@@ -1,33 +0,0 @@
1
- # A simple plugin that parses the listing of bwk's articles from
2
- # dailyprincetonian.com.
3
-
4
- require 'nokogiri'
5
-
6
- class Page < Bwkfanboy::Parse
7
- module Meta
8
- URI = 'http://www.dailyprincetonian.com/advanced_search/?author=Brian+Kernighan'
9
- URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/bwk.html'
10
- ENC = 'UTF-8'
11
- VERSION = 2
12
- COPYRIGHT = "See bwkfanboy's LICENSE file"
13
- TITLE = "Brian Kernighan's articles from Daily Princetonian"
14
- CONTENT_TYPE = 'html'
15
- end
16
-
17
- def myparse(stream)
18
- url = "http://www.dailyprincetonian.com"
19
-
20
- doc = Nokogiri::HTML(stream, nil, Meta::ENC)
21
- doc.xpath("//div[@class='article_item']").each {|i|
22
- t = clean(i.xpath("h2/a").children.text())
23
- fail 'unable to extract link' if (link = clean(i.xpath("h2/a")[0].attributes['href'].value()).empty?)
24
- link = clean(i.xpath("h2/a")[0].attributes['href'].value())
25
- l = url + link + "print"
26
- u = date(i.xpath("h2").children[1].text())
27
- a = clean(i.xpath("div/span/a[1]").children.text())
28
- c = clean(i.xpath("div[@class='summary']").text())
29
-
30
- self << { title: t, link: l, updated: u, author: a, content: c }
31
- }
32
- end
33
- end
@@ -1,34 +0,0 @@
1
- # This is a skeleton for a bwkfanboy 1.3.0 plugin. To understand how
2
- # plugins work please read doc/plugins.rdoc file from bwkfanboy's
3
- # distribution.
4
-
5
- require 'nokogiri'
6
-
7
- class Page < Bwkfanboy::Parse
8
- module Meta
9
- URI = 'http://www.econlib.org/cgi-bin/searcharticles.pl?sortby=DD&query=ha*'
10
- URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/econlib.html'
11
- ENC = 'UTF-8'
12
- VERSION = 1
13
- COPYRIGHT = "See bwkfanboy's LICENSE file"
14
- TITLE = "Latest articles from econlib.org"
15
- CONTENT_TYPE = 'html'
16
- end
17
-
18
- def myparse(stream)
19
- baseurl = 'http://www.econlib.org'
20
-
21
- # read 'stream' IO object and parse it
22
- doc = Nokogiri::HTML(stream, nil, Meta::ENC)
23
- doc.xpath("//*[@id='divResults']//tr").each {|i|
24
- t = clean(i.xpath("td[3]//a").text)
25
- next if t == ""
26
- l = baseurl + clean(i.xpath("td[3]//a")[0].attributes['href'].value)
27
- u = date(i.xpath("td[4]").children.text)
28
- a = clean(i.xpath("td[3]/div").children[2].text)
29
- c = clean(i.xpath("td[4]").children[2].text)
30
-
31
- self << { title: t, link: l, updated: u, author: a, content: c }
32
- }
33
- end
34
- end
@@ -1,76 +0,0 @@
1
- require 'digest/md5'
2
-
3
- class Page < Bwkfanboy::Parse
4
- module Meta
5
- URI = '/usr/ports/UPDATING'
6
- URI_DEBUG = URI
7
- ENC = 'ASCII'
8
- VERSION = 3
9
- COPYRIGHT = "See bwkfanboy's LICENSE file"
10
- TITLE = "News from FreeBSD ports"
11
- CONTENT_TYPE = 'text'
12
- end
13
-
14
- def myadd(ready, t, l, u, a, c)
15
- return true if ! ready
16
- return false if toobig?
17
- self << { title: t, link: l, updated: u, author: a, content: c.rstrip } if ready
18
- return true
19
- end
20
-
21
- def clean(t)
22
- t = t[2..-1] if t[0] != "\t"
23
- return '' if t == nil
24
- return t
25
- end
26
-
27
- def myparse(stream)
28
- re_u = /^(\d{8}):$/
29
- re_t1 = /^ {2}AFFECTS:\s+(.+)$/
30
- re_t2 = /^\s+(.+)$/
31
- re_a = /^ {2}AUTHORS?:\s+(.+)$/
32
-
33
- ready = false
34
- mode = nil
35
- t = l = u = a = c = nil
36
- while line = stream.gets
37
- line.rstrip!
38
-
39
- if line =~ re_u then
40
- # add a new entry
41
- break if ! myadd(ready, t, l, u, a, c)
42
- ready = true
43
- u = date($1)
44
- l = $1 # partial, see below
45
- t = a = c = nil
46
- next
47
- end
48
-
49
- if ready then
50
- if line =~ re_t1 then
51
- mode = 'title'
52
- t = $1
53
- c = clean($&) + "\n"
54
- # link should be unique
55
- l = "file://#{Meta::URI}\##{l}-#{Digest::MD5.hexdigest($1)}"
56
- elsif line =~ re_a
57
- mode = 'author'
58
- a = $1
59
- c += clean($&) + "\n"
60
- elsif line =~ re_t2 && mode == 'title'
61
- t += ' ' + $1
62
- c += clean($&) + "\n"
63
- else
64
- # content
65
- c += clean(line) + "\n"
66
- mode = nil
67
- end
68
- end
69
-
70
- # skipping the preamble
71
- end
72
-
73
- # add last entry
74
- myadd(ready, t, l, u, a, c)
75
- end
76
- end
@@ -1,37 +0,0 @@
1
- # Requires 1 option: an author's name, for example 'jason-fried'.
2
-
3
- require 'nokogiri'
4
-
5
- class Page < Bwkfanboy::Parse
6
- module Meta
7
- URI = 'http://www.inc.com/author/#{opt[0]}'
8
- URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/inc.html'
9
- ENC = 'UTF-8'
10
- VERSION = 1
11
- COPYRIGHT = 'See bwkfanboy\'s LICENSE file'
12
- TITLE = "Articles (per-user) from inc.com"
13
- CONTENT_TYPE = 'html'
14
- end
15
-
16
- def myparse(stream)
17
- profile = opt[0]
18
-
19
- # read 'stream' IO object and parse it
20
- doc = Nokogiri::HTML(stream, nil, Meta::ENC)
21
- doc.xpath("//div[@id='articleriver']/div/div").each {|i|
22
- t = clean(i.xpath("h3").text)
23
- l = clean(i.xpath("h3/a")[0].attributes['href'].value)
24
-
25
- next if (u = i.xpath("div[@class='byline']/span")).size == 0
26
- u = date(u.text)
27
-
28
- a = clean(i.xpath("div[@class='byline']/a").text)
29
-
30
- c = i.xpath("p[@class='summary']")
31
- c.xpath("a").remove
32
- c = c.inner_html(encoding: Meta::ENC)
33
-
34
- self << { title: t, link: l, updated: u, author: a, content: c }
35
- }
36
- end
37
- end
@@ -1,39 +0,0 @@
1
- {
2
- "type": "object",
3
- "properties": {
4
- "channel": {
5
- "type": "object",
6
- "properties": {
7
- "updated": {
8
- "type": "string",
9
- "format": "date-time"
10
- },
11
- "id": { "type": "string" },
12
- "author": { "type": "string" },
13
- "title": { "type": "string" },
14
- "link": { "type": "string" },
15
- "x_entries_content_type": {
16
- "type": "string",
17
- "enum": ["text", "html", "xhtml"]
18
- }
19
- }
20
- },
21
- "x_entries": {
22
- "type": "array",
23
- "minItems": 1,
24
- "items": {
25
- "type": "object",
26
- "properties": {
27
- "title": { "type": "string" },
28
- "link": { "type": "string" },
29
- "updated": {
30
- "type": "string",
31
- "format": "date-time"
32
- },
33
- "author": { "type": "string" },
34
- "content": { "type": "string" }
35
- }
36
- }
37
- }
38
- }
39
- }
@@ -1,4 +0,0 @@
1
- #!/bin/sh
2
- echo this is stdin
3
- echo this is stderr 1>&2
4
- exit 32
@@ -1,36 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # -*-ruby-*-
3
- # :erb:
4
-
5
- # This is a helper for your Rakefile. Read the comments for each
6
- # function.
7
-
8
- require 'git'
9
- require 'pp'
10
-
11
- # Return a list of files in a git repository _repdir_.
12
- #
13
- # Add this to your gem spec:
14
- #
15
- # spec = Gem::Specification.new {|i|
16
- # i.files = git_ls('.')
17
- # }
18
- #
19
- # What it does is just collecting the list of the files from the git
20
- # repository. The idea is to use that list for the gem spec. No more
21
- # missing or redundant files in gems!
22
- def git_ls(repdir, ignore_some = true)
23
- ignore = ['/?\.gitignore$']
24
-
25
- r = []
26
- g = Git.open repdir
27
- g.ls_files.each {|i, v|
28
- next if ignore_some && ignore.index {|ign| i.match(/#{ign}/) }
29
- r << i
30
- }
31
- r
32
- end
33
-
34
- pp git_ls('.') if __FILE__ == $0
35
-
36
- # Don't remove this: falsework/0.2.2/naive/2010-12-26T04:50:00+02:00
@@ -1,35 +0,0 @@
1
- # -*-ruby-*-
2
-
3
- require 'open-uri'
4
-
5
- LINKS = 'links.txt'
6
-
7
- desc "fetch pages found in #{LINKS} file; (filter with 't=page')"
8
- task :default do
9
- if ENV['t'] =~ /^\s*$/ || !ENV.key?('t') then ENV['t'] = '.*' end
10
-
11
- stng = false
12
- File.open(LINKS) { |fp|
13
- n = 0
14
- while line = fp.gets
15
- n += 1
16
- next if (line =~ /^\s*#/ || line =~ /^\s*$/)
17
- a = line.split
18
- fail "invalid line #{n}" if a.length != 2
19
- next if (a[0] !~ Regexp::new(ENV['t']))
20
-
21
- rm(a[0], force: true)
22
- printf "fetching '#{a[1]}'... "
23
- open(a[1]) {|remote|
24
- open(a[0], 'w+') {|out| out.puts remote.read }
25
- stng = true
26
- }
27
- puts 'OK'
28
- end
29
- }
30
-
31
- if !stng
32
- STDERR.puts 'No matching fetch targets found.'
33
- exit 1
34
- end
35
- end