bwkfanboy 1.4.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +7 -0
  3. data/Gemfile.lock +51 -0
  4. data/Procfile +1 -0
  5. data/README.rdoc +40 -77
  6. data/Rakefile +13 -48
  7. data/bin/bwkfanboy +47 -166
  8. data/bin/bwkfanboy_generate +7 -19
  9. data/bin/bwkfanboy_parse +21 -17
  10. data/bwkfanboy.gemspec +40 -0
  11. data/config.ru +3 -0
  12. data/doc/NEWS.rdoc +21 -79
  13. data/doc/plugin.rdoc +63 -79
  14. data/etc/bwkfanboy.yaml +2 -0
  15. data/etc/sinatra.rb +34 -0
  16. data/lib/bwkfanboy/cliconfig.rb +141 -0
  17. data/lib/bwkfanboy/cliutils.rb +114 -0
  18. data/lib/bwkfanboy/fetch.rb +22 -24
  19. data/lib/bwkfanboy/generator.rb +78 -0
  20. data/lib/bwkfanboy/home.rb +53 -0
  21. data/lib/bwkfanboy/meta.rb +5 -2
  22. data/lib/bwkfanboy/plugin.rb +247 -0
  23. data/lib/bwkfanboy/plugin_skeleton.erb +19 -23
  24. data/lib/bwkfanboy/server.rb +73 -0
  25. data/lib/bwkfanboy/utils.rb +39 -129
  26. data/plugins/bwk.rb +25 -0
  27. data/plugins/econlib.rb +22 -0
  28. data/plugins/freebsd-ports-update.rb +73 -0
  29. data/plugins/inc.rb +29 -0
  30. data/plugins/test.rb +29 -0
  31. data/public/.gitattributes +1 -0
  32. data/public/favicon.ico +0 -0
  33. data/public/jquery-1.7.2.min.js +0 -0
  34. data/public/list.js +111 -0
  35. data/public/loading.gif +0 -0
  36. data/public/style.css +54 -0
  37. data/shotgun.rb +20 -0
  38. data/test/example/.gitattributes +1 -0
  39. data/test/example/.gitignore +1 -0
  40. data/test/example/02/plugins/bwk.html +0 -0
  41. data/test/{plugins → example/02/plugins}/empty.rb +0 -0
  42. data/test/example/02/plugins/garbage.rb +1 -0
  43. data/test/example/02/plugins/inc.html +0 -0
  44. data/test/helper.rb +30 -27
  45. data/test/helper_cliutils.rb +34 -0
  46. data/test/test_cli.rb +86 -0
  47. data/test/test_fetch.rb +49 -18
  48. data/test/test_generate.rb +43 -16
  49. data/test/test_home.rb +33 -0
  50. data/test/test_plugin.rb +141 -0
  51. data/test/test_server.rb +21 -32
  52. data/views/list.haml +38 -0
  53. metadata +223 -110
  54. data/bin/bwkfanboy_fetch +0 -13
  55. data/bin/bwkfanboy_server +0 -126
  56. data/doc/README.erb +0 -114
  57. data/doc/README.rdoc +0 -141
  58. data/doc/TODO +0 -7
  59. data/doc/bwkfanboy_fetch.rdoc +0 -4
  60. data/doc/bwkfanboy_generate.rdoc +0 -7
  61. data/doc/bwkfanboy_parse.rdoc +0 -7
  62. data/doc/bwkfanboy_server.rdoc +0 -35
  63. data/doc/rakefile.rb +0 -59
  64. data/lib/bwkfanboy/generate.rb +0 -63
  65. data/lib/bwkfanboy/parser.rb +0 -156
  66. data/lib/bwkfanboy/plugins/bwk.rb +0 -33
  67. data/lib/bwkfanboy/plugins/econlib.rb +0 -34
  68. data/lib/bwkfanboy/plugins/freebsd-ports-update.rb +0 -76
  69. data/lib/bwkfanboy/plugins/inc.rb +0 -37
  70. data/lib/bwkfanboy/schema.js +0 -39
  71. data/test/popen4.sh +0 -4
  72. data/test/rake_git.rb +0 -36
  73. data/test/semis/Rakefile +0 -35
  74. data/test/semis/bwk.html +0 -393
  75. data/test/semis/bwk.json +0 -82
  76. data/test/semis/econlib.html +0 -21
  77. data/test/semis/inc.html +0 -1067
  78. data/test/semis/links.txt +0 -4
  79. data/test/test_parse.rb +0 -27
  80. data/test/xml-clean.sh +0 -8
  81. data/web/bwkfanboy.cgi +0 -36
@@ -1,156 +0,0 @@
1
- require 'json'
2
- require 'date'
3
-
4
- require_relative 'utils'
5
-
6
- # :include: ../../doc/README.rdoc
7
- module Bwkfanboy
8
-
9
- # :include: ../../doc/plugin.rdoc
10
- class Parse
11
- ENTRIES_MAX = 128
12
-
13
- attr_reader :opt
14
-
15
- def initialize(opt = [])
16
- @entries = []
17
- @opt = opt
18
- end
19
-
20
- # Invokes #myparse & checks if it has grabbed something.
21
- def parse(stream)
22
- @entries = []
23
- begin
24
- myparse(stream)
25
- rescue
26
- @entries = []
27
- Utils.errx(1, "parsing failed: #{$!}\n\nBacktrace:\n\n#{$!.backtrace.join("\n")}")
28
- end
29
- Utils.errx(1, "plugin return no output") if @entries.length == 0
30
- end
31
-
32
- def uri()
33
- m = get_meta()
34
- eval("\"#{m::URI}\"")
35
- end
36
-
37
- # Prints entries in 'key: value' formatted strings. Intended for
38
- # debugging.
39
- def dump()
40
- @entries.each {|i|
41
- puts "title : " + i[:title]
42
- puts "link : " + i[:link]
43
- puts "updated : " + i[:updated]
44
- puts "author : " + i[:author]
45
- puts "content : " + i[:content]
46
- puts ""
47
- }
48
- end
49
-
50
- def to_json()
51
- # guess the time of the most recent entry
52
- u = DateTime.parse() # January 1, 4713 BCE
53
- @entries.each {|i|
54
- t = DateTime.parse(i[:updated])
55
- u = t if t > u
56
- }
57
-
58
- m = get_meta()
59
- uri = eval("\"#{m::URI}\"")
60
- j = {
61
- channel: {
62
- updated: u,
63
- id: uri,
64
- author: Meta::NAME, # just a placeholder
65
- title: m::TITLE,
66
- link: uri,
67
- x_entries_content_type: m::CONTENT_TYPE
68
- },
69
- x_entries: @entries
70
- }
71
- Utils::cfg[:verbose] >= 1 ? JSON.pretty_generate(j) : JSON.generate(j)
72
- end
73
-
74
- # After loading a plugin, one can do basic validation of the
75
- # plugin's class with the help of this method.
76
- def check
77
- m = get_meta()
78
- begin
79
- [:URI, :ENC, :VERSION, :COPYRIGHT, :TITLE, :CONTENT_TYPE].each {|i|
80
- fail "#{m}::#{i} not defined or empty" if (! m.const_defined?(i) || m.const_get(i) =~ /^\s*$/)
81
- }
82
-
83
- if m::URI =~ /#\{.+?\}/ && @opt.size == 0
84
- fail 'additional options required'
85
- end
86
- rescue
87
- Utils.errx(1, "incomplete plugin's instance: #{$!}")
88
- end
89
- end
90
-
91
- # Prints plugin's meta information.
92
- def dump_info()
93
- m = get_meta()
94
- puts "Version : #{m::VERSION}"
95
- puts "Copyright : #{m::COPYRIGHT}"
96
- puts "Title : #{m::TITLE}"
97
- puts "URI : #{uri}"
98
- end
99
-
100
- protected
101
-
102
- # This *must* be overridden in the child.
103
- def myparse(stream)
104
- raise "plugin isn't finished yet"
105
- end
106
-
107
- # Tries to parse _s_ as a date string. Return the result in ISO 8601
108
- # format.
109
- def date(s)
110
- begin
111
- DateTime.parse(clean(s)).iso8601()
112
- rescue
113
- Utils.vewarnx(2, "#{s} is unparsable; date is set to current")
114
- DateTime.now().iso8601()
115
- end
116
- end
117
-
118
- # will help you to check if there is a
119
- def toobig?
120
- return true if @entries.length >= ENTRIES_MAX
121
- return false
122
- end
123
-
124
- def <<(t)
125
- if toobig? then
126
- Utils.warnx("reached max number of entries (#{ENTRIES_MAX})")
127
- return @entries
128
- end
129
-
130
- %w(updated author link).each { |i|
131
- fail "unable to extract '#{i}'" if ! t.key?(i.to_sym) || t[i.to_sym] == nil || t[i.to_sym].empty?
132
- }
133
- %w(title content).each { |i|
134
- fail "missing '#{i}'" if ! t.key?(i.to_sym) || t[i.to_sym] == nil
135
- }
136
- # a redundant check if user hasn't redefined date() method
137
- if t[:updated] !~ /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}/ then
138
- fail "'#{t[:updated]}' isn't in iso8601 format"
139
- end
140
- @entries << t
141
- end
142
-
143
- private
144
-
145
- def clean(s)
146
- s.gsub(/\s+/, ' ').strip()
147
- end
148
-
149
- def get_meta()
150
- Utils.errx(1, "incomplete plugin: no #{self.class}::Meta module") if (! defined?(self.class::Meta) || ! self.class::Meta.is_a?(Module))
151
- self.class::Meta
152
- end
153
-
154
- end # class
155
-
156
- end # module
@@ -1,33 +0,0 @@
1
- # A simple plugin that parses the listing of bwk's articles from
2
- # dailyprincetonian.com.
3
-
4
- require 'nokogiri'
5
-
6
- class Page < Bwkfanboy::Parse
7
- module Meta
8
- URI = 'http://www.dailyprincetonian.com/advanced_search/?author=Brian+Kernighan'
9
- URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/bwk.html'
10
- ENC = 'UTF-8'
11
- VERSION = 2
12
- COPYRIGHT = "See bwkfanboy's LICENSE file"
13
- TITLE = "Brian Kernighan's articles from Daily Princetonian"
14
- CONTENT_TYPE = 'html'
15
- end
16
-
17
- def myparse(stream)
18
- url = "http://www.dailyprincetonian.com"
19
-
20
- doc = Nokogiri::HTML(stream, nil, Meta::ENC)
21
- doc.xpath("//div[@class='article_item']").each {|i|
22
- t = clean(i.xpath("h2/a").children.text())
23
- fail 'unable to extract link' if (link = clean(i.xpath("h2/a")[0].attributes['href'].value()).empty?)
24
- link = clean(i.xpath("h2/a")[0].attributes['href'].value())
25
- l = url + link + "print"
26
- u = date(i.xpath("h2").children[1].text())
27
- a = clean(i.xpath("div/span/a[1]").children.text())
28
- c = clean(i.xpath("div[@class='summary']").text())
29
-
30
- self << { title: t, link: l, updated: u, author: a, content: c }
31
- }
32
- end
33
- end
@@ -1,34 +0,0 @@
1
- # This is a skeleton for a bwkfanboy 1.3.0 plugin. To understand how
2
- # plugins work please read doc/plugins.rdoc file from bwkfanboy's
3
- # distribution.
4
-
5
- require 'nokogiri'
6
-
7
- class Page < Bwkfanboy::Parse
8
- module Meta
9
- URI = 'http://www.econlib.org/cgi-bin/searcharticles.pl?sortby=DD&query=ha*'
10
- URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/econlib.html'
11
- ENC = 'UTF-8'
12
- VERSION = 1
13
- COPYRIGHT = "See bwkfanboy's LICENSE file"
14
- TITLE = "Latest articles from econlib.org"
15
- CONTENT_TYPE = 'html'
16
- end
17
-
18
- def myparse(stream)
19
- baseurl = 'http://www.econlib.org'
20
-
21
- # read 'stream' IO object and parse it
22
- doc = Nokogiri::HTML(stream, nil, Meta::ENC)
23
- doc.xpath("//*[@id='divResults']//tr").each {|i|
24
- t = clean(i.xpath("td[3]//a").text)
25
- next if t == ""
26
- l = baseurl + clean(i.xpath("td[3]//a")[0].attributes['href'].value)
27
- u = date(i.xpath("td[4]").children.text)
28
- a = clean(i.xpath("td[3]/div").children[2].text)
29
- c = clean(i.xpath("td[4]").children[2].text)
30
-
31
- self << { title: t, link: l, updated: u, author: a, content: c }
32
- }
33
- end
34
- end
@@ -1,76 +0,0 @@
1
- require 'digest/md5'
2
-
3
- class Page < Bwkfanboy::Parse
4
- module Meta
5
- URI = '/usr/ports/UPDATING'
6
- URI_DEBUG = URI
7
- ENC = 'ASCII'
8
- VERSION = 3
9
- COPYRIGHT = "See bwkfanboy's LICENSE file"
10
- TITLE = "News from FreeBSD ports"
11
- CONTENT_TYPE = 'text'
12
- end
13
-
14
- def myadd(ready, t, l, u, a, c)
15
- return true if ! ready
16
- return false if toobig?
17
- self << { title: t, link: l, updated: u, author: a, content: c.rstrip } if ready
18
- return true
19
- end
20
-
21
- def clean(t)
22
- t = t[2..-1] if t[0] != "\t"
23
- return '' if t == nil
24
- return t
25
- end
26
-
27
- def myparse(stream)
28
- re_u = /^(\d{8}):$/
29
- re_t1 = /^ {2}AFFECTS:\s+(.+)$/
30
- re_t2 = /^\s+(.+)$/
31
- re_a = /^ {2}AUTHORS?:\s+(.+)$/
32
-
33
- ready = false
34
- mode = nil
35
- t = l = u = a = c = nil
36
- while line = stream.gets
37
- line.rstrip!
38
-
39
- if line =~ re_u then
40
- # add a new entry
41
- break if ! myadd(ready, t, l, u, a, c)
42
- ready = true
43
- u = date($1)
44
- l = $1 # partial, see below
45
- t = a = c = nil
46
- next
47
- end
48
-
49
- if ready then
50
- if line =~ re_t1 then
51
- mode = 'title'
52
- t = $1
53
- c = clean($&) + "\n"
54
- # link should be unique
55
- l = "file://#{Meta::URI}\##{l}-#{Digest::MD5.hexdigest($1)}"
56
- elsif line =~ re_a
57
- mode = 'author'
58
- a = $1
59
- c += clean($&) + "\n"
60
- elsif line =~ re_t2 && mode == 'title'
61
- t += ' ' + $1
62
- c += clean($&) + "\n"
63
- else
64
- # content
65
- c += clean(line) + "\n"
66
- mode = nil
67
- end
68
- end
69
-
70
- # skipping the preamble
71
- end
72
-
73
- # add last entry
74
- myadd(ready, t, l, u, a, c)
75
- end
76
- end
@@ -1,37 +0,0 @@
1
- # Requires 1 option: an author's name, for example 'jason-fried'.
2
-
3
- require 'nokogiri'
4
-
5
- class Page < Bwkfanboy::Parse
6
- module Meta
7
- URI = 'http://www.inc.com/author/#{opt[0]}'
8
- URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/inc.html'
9
- ENC = 'UTF-8'
10
- VERSION = 1
11
- COPYRIGHT = 'See bwkfanboy\'s LICENSE file'
12
- TITLE = "Articles (per-user) from inc.com"
13
- CONTENT_TYPE = 'html'
14
- end
15
-
16
- def myparse(stream)
17
- profile = opt[0]
18
-
19
- # read 'stream' IO object and parse it
20
- doc = Nokogiri::HTML(stream, nil, Meta::ENC)
21
- doc.xpath("//div[@id='articleriver']/div/div").each {|i|
22
- t = clean(i.xpath("h3").text)
23
- l = clean(i.xpath("h3/a")[0].attributes['href'].value)
24
-
25
- next if (u = i.xpath("div[@class='byline']/span")).size == 0
26
- u = date(u.text)
27
-
28
- a = clean(i.xpath("div[@class='byline']/a").text)
29
-
30
- c = i.xpath("p[@class='summary']")
31
- c.xpath("a").remove
32
- c = c.inner_html(encoding: Meta::ENC)
33
-
34
- self << { title: t, link: l, updated: u, author: a, content: c }
35
- }
36
- end
37
- end
@@ -1,39 +0,0 @@
1
- {
2
- "type": "object",
3
- "properties": {
4
- "channel": {
5
- "type": "object",
6
- "properties": {
7
- "updated": {
8
- "type": "string",
9
- "format": "date-time"
10
- },
11
- "id": { "type": "string" },
12
- "author": { "type": "string" },
13
- "title": { "type": "string" },
14
- "link": { "type": "string" },
15
- "x_entries_content_type": {
16
- "type": "string",
17
- "enum": ["text", "html", "xhtml"]
18
- }
19
- }
20
- },
21
- "x_entries": {
22
- "type": "array",
23
- "minItems": 1,
24
- "items": {
25
- "type": "object",
26
- "properties": {
27
- "title": { "type": "string" },
28
- "link": { "type": "string" },
29
- "updated": {
30
- "type": "string",
31
- "format": "date-time"
32
- },
33
- "author": { "type": "string" },
34
- "content": { "type": "string" }
35
- }
36
- }
37
- }
38
- }
39
- }
@@ -1,4 +0,0 @@
1
- #!/bin/sh
2
- echo this is stdin
3
- echo this is stderr 1>&2
4
- exit 32
@@ -1,36 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # -*-ruby-*-
3
- # :erb:
4
-
5
- # This is a helper for your Rakefile. Read the comments for each
6
- # function.
7
-
8
- require 'git'
9
- require 'pp'
10
-
11
- # Return a list of files in a git repository _repdir_.
12
- #
13
- # Add this to your gem spec:
14
- #
15
- # spec = Gem::Specification.new {|i|
16
- # i.files = git_ls('.')
17
- # }
18
- #
19
- # What it does is just collecting the list of the files from the git
20
- # repository. The idea is to use that list for the gem spec. No more
21
- # missing or redundant files in gems!
22
- def git_ls(repdir, ignore_some = true)
23
- ignore = ['/?\.gitignore$']
24
-
25
- r = []
26
- g = Git.open repdir
27
- g.ls_files.each {|i, v|
28
- next if ignore_some && ignore.index {|ign| i.match(/#{ign}/) }
29
- r << i
30
- }
31
- r
32
- end
33
-
34
- pp git_ls('.') if __FILE__ == $0
35
-
36
- # Don't remove this: falsework/0.2.2/naive/2010-12-26T04:50:00+02:00
@@ -1,35 +0,0 @@
1
- # -*-ruby-*-
2
-
3
- require 'open-uri'
4
-
5
- LINKS = 'links.txt'
6
-
7
- desc "fetch pages found in #{LINKS} file; (filter with 't=page')"
8
- task :default do
9
- if ENV['t'] =~ /^\s*$/ || !ENV.key?('t') then ENV['t'] = '.*' end
10
-
11
- stng = false
12
- File.open(LINKS) { |fp|
13
- n = 0
14
- while line = fp.gets
15
- n += 1
16
- next if (line =~ /^\s*#/ || line =~ /^\s*$/)
17
- a = line.split
18
- fail "invalid line #{n}" if a.length != 2
19
- next if (a[0] !~ Regexp::new(ENV['t']))
20
-
21
- rm(a[0], force: true)
22
- printf "fetching '#{a[1]}'... "
23
- open(a[1]) {|remote|
24
- open(a[0], 'w+') {|out| out.puts remote.read }
25
- stng = true
26
- }
27
- puts 'OK'
28
- end
29
- }
30
-
31
- if !stng
32
- STDERR.puts 'No matching fetch targets found.'
33
- exit 1
34
- end
35
- end