bwkfanboy 1.4.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +7 -0
  3. data/Gemfile.lock +51 -0
  4. data/Procfile +1 -0
  5. data/README.rdoc +40 -77
  6. data/Rakefile +13 -48
  7. data/bin/bwkfanboy +47 -166
  8. data/bin/bwkfanboy_generate +7 -19
  9. data/bin/bwkfanboy_parse +21 -17
  10. data/bwkfanboy.gemspec +40 -0
  11. data/config.ru +3 -0
  12. data/doc/NEWS.rdoc +21 -79
  13. data/doc/plugin.rdoc +63 -79
  14. data/etc/bwkfanboy.yaml +2 -0
  15. data/etc/sinatra.rb +34 -0
  16. data/lib/bwkfanboy/cliconfig.rb +141 -0
  17. data/lib/bwkfanboy/cliutils.rb +114 -0
  18. data/lib/bwkfanboy/fetch.rb +22 -24
  19. data/lib/bwkfanboy/generator.rb +78 -0
  20. data/lib/bwkfanboy/home.rb +53 -0
  21. data/lib/bwkfanboy/meta.rb +5 -2
  22. data/lib/bwkfanboy/plugin.rb +247 -0
  23. data/lib/bwkfanboy/plugin_skeleton.erb +19 -23
  24. data/lib/bwkfanboy/server.rb +73 -0
  25. data/lib/bwkfanboy/utils.rb +39 -129
  26. data/plugins/bwk.rb +25 -0
  27. data/plugins/econlib.rb +22 -0
  28. data/plugins/freebsd-ports-update.rb +73 -0
  29. data/plugins/inc.rb +29 -0
  30. data/plugins/test.rb +29 -0
  31. data/public/.gitattributes +1 -0
  32. data/public/favicon.ico +0 -0
  33. data/public/jquery-1.7.2.min.js +0 -0
  34. data/public/list.js +111 -0
  35. data/public/loading.gif +0 -0
  36. data/public/style.css +54 -0
  37. data/shotgun.rb +20 -0
  38. data/test/example/.gitattributes +1 -0
  39. data/test/example/.gitignore +1 -0
  40. data/test/example/02/plugins/bwk.html +0 -0
  41. data/test/{plugins → example/02/plugins}/empty.rb +0 -0
  42. data/test/example/02/plugins/garbage.rb +1 -0
  43. data/test/example/02/plugins/inc.html +0 -0
  44. data/test/helper.rb +30 -27
  45. data/test/helper_cliutils.rb +34 -0
  46. data/test/test_cli.rb +86 -0
  47. data/test/test_fetch.rb +49 -18
  48. data/test/test_generate.rb +43 -16
  49. data/test/test_home.rb +33 -0
  50. data/test/test_plugin.rb +141 -0
  51. data/test/test_server.rb +21 -32
  52. data/views/list.haml +38 -0
  53. metadata +223 -110
  54. data/bin/bwkfanboy_fetch +0 -13
  55. data/bin/bwkfanboy_server +0 -126
  56. data/doc/README.erb +0 -114
  57. data/doc/README.rdoc +0 -141
  58. data/doc/TODO +0 -7
  59. data/doc/bwkfanboy_fetch.rdoc +0 -4
  60. data/doc/bwkfanboy_generate.rdoc +0 -7
  61. data/doc/bwkfanboy_parse.rdoc +0 -7
  62. data/doc/bwkfanboy_server.rdoc +0 -35
  63. data/doc/rakefile.rb +0 -59
  64. data/lib/bwkfanboy/generate.rb +0 -63
  65. data/lib/bwkfanboy/parser.rb +0 -156
  66. data/lib/bwkfanboy/plugins/bwk.rb +0 -33
  67. data/lib/bwkfanboy/plugins/econlib.rb +0 -34
  68. data/lib/bwkfanboy/plugins/freebsd-ports-update.rb +0 -76
  69. data/lib/bwkfanboy/plugins/inc.rb +0 -37
  70. data/lib/bwkfanboy/schema.js +0 -39
  71. data/test/popen4.sh +0 -4
  72. data/test/rake_git.rb +0 -36
  73. data/test/semis/Rakefile +0 -35
  74. data/test/semis/bwk.html +0 -393
  75. data/test/semis/bwk.json +0 -82
  76. data/test/semis/econlib.html +0 -21
  77. data/test/semis/inc.html +0 -1067
  78. data/test/semis/links.txt +0 -4
  79. data/test/test_parse.rb +0 -27
  80. data/test/xml-clean.sh +0 -8
  81. data/web/bwkfanboy.cgi +0 -36
@@ -1,25 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
- # -*-ruby-*-
3
2
 
4
- require_relative '../lib/bwkfanboy/generate'
3
+ require_relative '../lib/bwkfanboy/cliutils'
4
+ require_relative '../lib/bwkfanboy/generator'
5
+ include Bwkfanboy
5
6
 
6
- $conf = {
7
- banner: "Usage: #{File.basename($0)} [options] < json",
8
- check: false
9
- }
10
-
11
- # we are expection the input ONLY in UTF-8
12
- Encoding.default_external = 'UTF-8'
13
-
14
- o = Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner])
15
- o.on('--check', 'Validate the input (slow!)') { |i| $conf[:check] = true }
16
- Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], o) # run cl parser
7
+ ### main
17
8
 
18
9
  begin
19
- j = JSON.parse(STDIN.read)
20
- rescue
21
- Bwkfanboy::Utils.errx(1, "stdin had invalid JSON");
10
+ puts Generator.atom(Generator.unpack $stdin)
11
+ rescue GeneratorException
12
+ CliUtils.errx EX_DATAERR, $!.to_s
22
13
  end
23
-
24
- if $conf[:check] then Bwkfanboy::Generate.validate(j) end
25
- puts Bwkfanboy::Generate.atom(j)
@@ -1,27 +1,31 @@
1
1
  #!/usr/bin/env ruby
2
- # -*-ruby-*-
3
2
 
4
- require_relative '../lib/bwkfanboy/parser'
3
+ require_relative '../lib/bwkfanboy/home'
4
+ require_relative '../lib/bwkfanboy/plugin'
5
+ include Bwkfanboy
5
6
 
6
- $conf = {
7
- banner: "Usage: #{File.basename($0)} [options] /path/to/my/plugin.rb < html"
7
+ ### main
8
+
9
+ home = Home.new {|loader, o|
10
+ loader.conf[:banner] = "#{File.basename $0} [options] plugin [opt1 opt2 ...] < html"
8
11
  }
9
12
 
10
- Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], nil, true)
13
+ CliUtils.errx EX_USAGE, home.conf[:banner] unless BH.all_set?(ARGV[0])
11
14
 
12
- if ARGV.size == 0 then
13
- abort($conf[:banner])
14
- else
15
- Bwkfanboy::Utils.plugin_load(ARGV[0], Bwkfanboy::Meta::PLUGIN_CLASS)
15
+ # load plugin
16
+ stream = [$stdin]
17
+ begin
18
+ p = Plugin.new home.conf[:plugins_path], ARGV[0], ARGV[1..-1] do |loader|
19
+ CliUtils.veputs 2, "'#{ARGV[0]}' plugin loaded from #{loader.origin}"
20
+ end
21
+ p.run_parser stream
22
+ rescue PluginException
23
+ CliUtils.errx EX_DATAERR, $!.to_s
16
24
  end
17
25
 
18
- opt = Bwkfanboy::Utils.plugin_opts(ARGV)
19
- pn = Page.new(opt)
20
- pn.check()
21
- pn.parse(STDIN)
22
-
23
- if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
24
- pn.dump()
26
+ # print output
27
+ if home.conf[:verbose] >= 1
28
+ pp p
25
29
  else
26
- puts pn.to_json()
30
+ p.pack $stdout
27
31
  end
@@ -0,0 +1,40 @@
1
+ # -*-ruby-*-
2
+ require File.expand_path('../lib/bwkfanboy/meta', __FILE__)
3
+ include Bwkfanboy
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.authors = [Meta::NAME]
7
+ gem.email = [Meta::EMAIL]
8
+ gem.description = 'A converter from a raw HTML to an Atom feed. You can use it to watch sites that do not provide its own feed'
9
+ gem.summary = gem.description + '.'
10
+ gem.homepage = Meta::HOMEPAGE
11
+
12
+ gem.files = `git ls-files`.split($\)
13
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
14
+ gem.test_files = gem.files.grep(%r{^test/test_.+\.rb})
15
+ gem.name = Meta::NAME
16
+ gem.version = Meta::VERSION
17
+
18
+ gem.required_ruby_version = '>= 1.9.2'
19
+ gem.extra_rdoc_files = gem.files.grep(%r{^doc/})
20
+ gem.rdoc_options << '-m' << 'README.rdoc'
21
+
22
+ gem.post_install_message = <<-MESSAGE
23
+ This version was rewritten from scratch.
24
+ Plugins from & for previous versions (0.x & 1.x) WILL NOT WORK.
25
+
26
+ See also doc/NEWS.rdoc file.
27
+ MESSAGE
28
+
29
+ gem.add_dependency "open4", "~> 1.3.0"
30
+ gem.add_dependency "msgpack", "~> 0.4.6"
31
+ gem.add_dependency "rake", "~> 0.9.2.2"
32
+ gem.add_dependency "nokogiri", "~> 1.5.2"
33
+ gem.add_dependency "sinatra", "~> 1.3.2"
34
+ gem.add_dependency "haml", "~> 3.1.4"
35
+ gem.add_dependency "rdoc", "~> 3.12"
36
+
37
+ gem.add_development_dependency "minitest", "~> 2.12.1"
38
+ gem.add_development_dependency "fakefs", "~> 0.4.0"
39
+ gem.add_development_dependency "rack-test", "~> 0.6.1"
40
+ end
@@ -0,0 +1,3 @@
1
+ require './lib/bwkfanboy/server'
2
+ run Bwkfanboy::MyApp
3
+
@@ -1,92 +1,34 @@
1
- === 1.4.1
1
+ === 2.0.0
2
2
 
3
- Wed Apr 20 21:12:16 EEST 2011
3
+ Mon Apr 23 07:28:52 EEST 2012
4
4
 
5
- - Repackage under RVM due to a strange bug (in native FreeBSD ruby +
6
- rubygems 1.7.2) with a date in the generated gem.
5
+ Changes from 1.4.1:
7
6
 
8
- === 1.4.0
7
+ * CGI script & webrick-based server are gone. Use rackup to start a
8
+ Sinatra app instead. HTTP API is completely new, which means you'll
9
+ need to change URLs in your RSS reader.
9
10
 
10
- Wed Apr 6 23:45:39 EEST 2011
11
+ * bwkfanboy_server is gone.
12
+
13
+ * Plugin interface API completely changed to work nicely with the
14
+ long-running Sinatra app.
11
15
 
12
- - A new plugin: articles (per-user) from inc.com.
16
+ This means you'll need to update all your personal plugins. I am very
17
+ sorry.
13
18
 
14
- - Removed quora plugin.
19
+ * bwkfanboy_fetch CL utility is gone. If you still need it, use curl
20
+ instead.
15
21
 
16
- - Fixed a but with '-t' command line option.
22
+ * bwkfanboy:
17
23
 
18
- === 1.3.2
24
+ - '-t' CL option is '-s' now;
19
25
 
20
- Mon Mar 28 12:13:31 EEST 2011
26
+ - output in '-i' is different;
21
27
 
22
- - Fixed a small bug with an Atom feed for plugins with Meta::URI options.
28
+ - '-p' is '--plugins-path'.
23
29
 
24
- - Updated quora plugin (a rather useless operation, because from [insert
25
- date] Quora provides its own answers-feed). This plugin will be
26
- removed.
30
+ * bwkfanboy_parse emits MessagePack instead of JSON. (Who cares?)
27
31
 
28
- === 1.3.1
32
+ * Log files in /tmp are gone. If you need logs from the Sinatra app,
33
+ comment in etc/sinatra.rb a monkey patch for rack.
29
34
 
30
- Thu Mar 17 17:19:52 EET 2011
31
-
32
- - Updated quora plugin.
33
-
34
- - Updated 1 test for jsonschema 2.0.1.
35
-
36
- - Updated the list of dependencies.
37
-
38
- === 1.3.0
39
-
40
- Thu Feb 10 11:17:57 EET 2011
41
-
42
- - A new plugin: articles form econlib.org.
43
-
44
- - (internal) Injected a small part of falsework's naive template.
45
-
46
- === 1.2.9
47
-
48
- - Reverted to rubygems 1.3.7 due to mysterious 1.5.0 behaviour.
49
-
50
- === 1.2.8
51
-
52
- - Updated quora plugin for nodejs 0.3.7.
53
-
54
- === 1.2.7
55
-
56
- - Fixed a bug with JavaScript parsing in quora plugin.
57
-
58
- === 1.2.5
59
-
60
- - Added '-t' CL option for bwkfanboy util.
61
-
62
- - Updated quora plugin.
63
-
64
- === 1.1.4
65
-
66
- - INCOMPATIBILITY: from now on, all plugins must do NOT read the stdin
67
- but bwkfanboy's provided stream. See doc/plugins.rdoc.
68
-
69
- - Moved the code from bwkfanboy_* to libraries.
70
-
71
- - From now on, bwkfanboy util by default do not run utils in a pipe but
72
- uses libraries directly. One can restore the old functionality with
73
- '-O' CL swith.
74
-
75
- === 0.1.4
76
-
77
- - The minimum required Ruby version is 1.9.2.
78
- - bwkfanboy_server now inserts a Last-Modified header.
79
- - Fixed (?) a bug in bwkfanboy_generate with external encoding.
80
- - Updated tests for Ruby 1.9.2.
81
-
82
- === 0.1.3
83
-
84
- - Updated Quora plugin.
85
- - Added '-V' command line option to all utils.
86
- - Updated tests.
87
-
88
- === 0.1.1
89
-
90
- - Plugins can have user-supplied options in realtime.
91
- - Added quora.rb plugin.
92
- - Fixed some typos.
@@ -1,88 +1,81 @@
1
1
  = How to Write a \Plugin
2
2
 
3
- First of all, look at examples provided with bwkfanboy. They were
4
- intended to be 100% working because I was writing them for myself.
3
+ First of all, look at examples included with bwkfanboy.
5
4
 
6
- Basically, all you need is to write a class named _Page_ that inherits
7
- class Bwkfanboy::Parse, override in the child \#myparse method and write
8
- a simple module named _Meta_ inside your _Page_ class.
5
+ Basically, all you need is to write 1 method (function) called +parse+
6
+ and set several instance variables.
9
7
 
10
8
  == Skeleton
11
9
 
12
- Here is a skeleton of a plugin:
13
-
14
- require 'nokogiri'
15
-
16
- class Page < Bwkfanboy::Parse
17
- module Meta
18
- URI = 'http://example.org/news'
19
- ENC = 'UTF-8'
20
- VERSION = 1
21
- COPYRIGHT = '(c) 2010 John Doe'
22
- TITLE = "News from example.org"
23
- CONTENT_TYPE = 'html'
24
- end
25
-
26
- def myparse(stream)
27
- # read 'stream' IO object and parse it
28
- doc = Nokogiri::HTML(stream, nil, Meta::ENC)
29
- doc.xpath("XPATH QUERY").each {|i|
30
- t = clean(i.xpath("XPATH QUERY").text())
31
- l = clean(i.xpath("XPATH QUERY").text())
32
- u = date(i.xpath("XPATH QUERY").text())
33
- a = clean(i.xpath("XPATH QUERY").text())
34
- c = clean(i.xpath("XPATH QUERY").text())
35
-
36
- self << { title: t, link: l, updated: u, author: a, content: c }
10
+ Here is a simple plugin skeleton:
11
+
12
+ @uri << 'http://example.org/news'
13
+ @enc = 'UTF-8'
14
+ @version = 1
15
+ @copyright = '(c) 2012 John Doe'
16
+ @title = "News from example.org"
17
+ @content_type = 'html'
18
+
19
+ def parse streams
20
+ streams.each do |io|
21
+ doc = Nokogiri::HTML io, nil, @enc
22
+ doc.xpath("XPATH QUERY").each {|idx|
23
+ t = idx.xpath("XPATH QUERY").text
24
+ l = idx.xpath("XPATH QUERY").text
25
+ u = BH.date idx.xpath("XPATH QUERY").text
26
+ a = idx.xpath("XPATH QUERY").text
27
+ c = idx.xpath("XPATH QUERY").text
28
+
29
+ self << { 'title' => t, 'link' => l, 'updated' => u,
30
+ 'author' => a, 'content' => c }
37
31
  }
38
32
  end
39
33
  end
40
34
 
35
+
41
36
  You can get the skeleton in the current directory by typing:
42
37
 
43
- % bwkfanboy -t myplugin.rb
38
+ % bwkfanboy -s myplugin
44
39
 
45
- As you see, we are using Nokogiri for HTML parsing. You are not
46
- required to use it too--take the parser whatever you like. Nokogiri
47
- is nice, because it's able to read a broken HTML and search thought
48
- it via XPath. If you would like to use, for example, REXML, beware
49
- that it loves only strict XML--you may need to clean the HTML with
50
- an external utility such as Tide.
40
+ As you see, we are using Nokogiri for HTML parsing. You are not required
41
+ to use it too--choose the parser whatever you like. Nokogiri is nice,
42
+ because it's able to read a broken HTML and search thought it via
43
+ XPath. If you would like to use, for example, REXML, beware that it
44
+ loves only strict XML--you may need to clean the HTML with an external
45
+ utility such as Tide.
51
46
 
52
- Bwkfanboy loads a plugin from 1 file as a valid Ruby code. It means
53
- that the plugin can contain *any* Ruby code, but doesn't mean that
54
- it should.
47
+ Bwkfanboy loads a plugin from 1 file as a valid Ruby code (via
48
+ <tt>instance_eval</tt>). This means that the plugin can contain *any*
49
+ Ruby code, but doesn't mean that it should.
55
50
 
56
51
  === \Meta
57
52
 
58
- Module _Meta_ can only have constants--and *all* constants listed in the
59
- skeleton are mandatory.
53
+ Several instance variable are required to be set.
60
54
 
61
- * <tt>URI</tt>--can be a <tt>http(s)://</tt> or <tt>ftp://</tt> URL
62
- or just a path to a file on your local machine, as
63
- <tt>/home/bob/huzza.html</tt>. This is the source that
64
- bwkfanboy will be transforming to the Atom feed.
55
+ * <tt>@uri</tt>--is a an <b>array of strings</b> <tt>http(s)://</tt> or
56
+ <tt>ftp://</tt> URL or just a path to a file on your local machine, as
57
+ <tt>/home/bob/huzza.html</tt>. This is sources that bwkfanboy will use
58
+ to create 1 Atom feed.
65
59
 
66
- * <tt>ENC</tt>--an encoding for URI.
60
+ * <tt>@enc</tt>--an encoding for *all* URIs in @uri.
67
61
 
68
- * <tt>VERSION</tt>--a version of a plugin.
62
+ * <tt>@version</tt>--a version of a plugin. Any unsigned integer.
69
63
 
70
- * <tt>COPYRIGHT</tt>--some boring string.
64
+ * <tt>@copyright</tt>--some boring string.
71
65
 
72
- * <tt>TITLE</tt>--a short description of the future feed. It'll be
73
- used later in the resulting XML.
66
+ * <tt>@title</tt>--a short description of the future feed.
74
67
 
75
- * <tt>CONTENT_TYPE</tt>--one of +xhtml+, +html+ or +text+ values. This is
76
- very important constant because it says in what format entries
77
- will be placed in the feed. Usually it's safe to use +html+.
68
+ * <tt>@content_type</tt>--one of +xhtml+, +html+ or +text+ values. This
69
+ is very important veriable because it says what format entries would
70
+ have in the feed. Usually it's safe to use +html+.
78
71
 
79
- === myparse
72
+ === parse
80
73
 
81
- In \#myparse method read 'stream' IO object. The contents of it is the
74
+ +parse+ method uses an array of IO objects. The contents of them is the
82
75
  raw HTML you want to parse. The general idea:
83
76
 
84
77
  * Atom feed must contain at least 1 entry, so look in the HTML for some
85
- crap which you must break into 5 peaces:
78
+ crap that you will break into 5 peaces:
86
79
 
87
80
  - a title of the entry
88
81
  - a link for it
@@ -93,7 +86,8 @@ raw HTML you want to parse. The general idea:
93
86
  * After you scan and grab 1 entry, create a hash and add it to
94
87
  _self_ as it was in the skeleton:
95
88
 
96
- self << { title: t, link: l, updated: u, author: a, content: c }
89
+ self << { 'title' => t, 'link' => l, 'updated' => u,
90
+ 'author' => a, 'content' => c }
97
91
 
98
92
  Here variables _t_, _l_, _u_, _a_ and _c_ contains the actual
99
93
  value of 5 peaces for the entry. Names of the keys in the hash are
@@ -102,10 +96,9 @@ raw HTML you want to parse. The general idea:
102
96
  * Probably there would be more crap in the HTML that you can use to
103
97
  construct another entry. Keep parsing and adding entries.
104
98
 
105
- * While you're scanning, use the 2 helper methods for cleaning each
106
- peace: \#clean, which removed duplicate spaces and \#date, which parses
107
- a string and return a date in ISO8601 format. You may override \#date
108
- method if you like.
99
+ * While you're scanning, use the <tt>BH.date</tt> helper method, which
100
+ parses a string and return a date in ISO8601 format.
101
+
109
102
 
110
103
  === Options
111
104
 
@@ -116,40 +109,31 @@ it is silly to write a new plugin every time for a new
116
109
  participant. Instead, you can write 1 plugin which have an _option_ to
117
110
  take a parameter (a user name, in this case).
118
111
 
119
- Options (if any) are always accessible via \#opt method which is just an
120
- attr_reader of an array.
112
+ Options (if any) are always accessible via <tt>@opt</tt> valiable which is an
113
+ array of strings.
121
114
 
122
- One can play the really interesting trick with Meta::URI constant. It is
115
+ One can play the really interesting trick with <tt>@uri</tt>. It is
123
116
  possible to make it dynamic, for example:
124
117
 
125
- URI = 'http://www.quora.com/#{opt[0]}/answers'
118
+ @opt.each {|i| @uri << 'http://www.inc.com/author/' + i }
126
119
 
127
- Then, if a user will provide 1 option (say 'Mark-Suster')--it will
120
+ Then, if a user will provide 1 option (say 'jason-fried')--it will
128
121
  appear in the final URI as follows:
129
122
 
130
- http://www.quora.com/Mark-Suster/answers
123
+ http://www.inc.com/author/jason-fried
131
124
 
132
- Such dynamic is possible only for Meta::URI constant and in such case,
133
- _option_ becomes mandatory for the end-user.
134
125
 
135
126
  == How to test all this
136
127
 
137
128
  To test how nice your plugin works, save the html page to the file
138
129
  and type:
139
130
 
140
- % bwkfanboy_parse -vv /path/to/the/plugin.rb < saved_page.html
141
-
142
- to see the result as in plain text, or
143
-
144
- % bwkfanboy_parse -v /path/to/the/plugin.rb < saved_page.html
145
-
146
- as a pretty JSON.
131
+ % bwkfanboy_parse -v plugin < saved_page.html
147
132
 
148
133
  For option-enabled plugins, supply additional parameters for them after
149
134
  the plugin path:
150
135
 
151
- % bwkfanboy_parse -vv /path/to/the/plugin.rb \
152
- option_1 "option 2" < saved_page.html
136
+ % bwkfanboy_parse -v plugin opt1 opt2 < saved_page.html
153
137
 
154
138
  <tt>bwkfanboy_parse</tt> return 0 if no errors occurred or >= 1 if you
155
139
  have errors in your plugin code. N.B.: the output from