bwkfanboy 1.4.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +7 -0
  3. data/Gemfile.lock +51 -0
  4. data/Procfile +1 -0
  5. data/README.rdoc +40 -77
  6. data/Rakefile +13 -48
  7. data/bin/bwkfanboy +47 -166
  8. data/bin/bwkfanboy_generate +7 -19
  9. data/bin/bwkfanboy_parse +21 -17
  10. data/bwkfanboy.gemspec +40 -0
  11. data/config.ru +3 -0
  12. data/doc/NEWS.rdoc +21 -79
  13. data/doc/plugin.rdoc +63 -79
  14. data/etc/bwkfanboy.yaml +2 -0
  15. data/etc/sinatra.rb +34 -0
  16. data/lib/bwkfanboy/cliconfig.rb +141 -0
  17. data/lib/bwkfanboy/cliutils.rb +114 -0
  18. data/lib/bwkfanboy/fetch.rb +22 -24
  19. data/lib/bwkfanboy/generator.rb +78 -0
  20. data/lib/bwkfanboy/home.rb +53 -0
  21. data/lib/bwkfanboy/meta.rb +5 -2
  22. data/lib/bwkfanboy/plugin.rb +247 -0
  23. data/lib/bwkfanboy/plugin_skeleton.erb +19 -23
  24. data/lib/bwkfanboy/server.rb +73 -0
  25. data/lib/bwkfanboy/utils.rb +39 -129
  26. data/plugins/bwk.rb +25 -0
  27. data/plugins/econlib.rb +22 -0
  28. data/plugins/freebsd-ports-update.rb +73 -0
  29. data/plugins/inc.rb +29 -0
  30. data/plugins/test.rb +29 -0
  31. data/public/.gitattributes +1 -0
  32. data/public/favicon.ico +0 -0
  33. data/public/jquery-1.7.2.min.js +0 -0
  34. data/public/list.js +111 -0
  35. data/public/loading.gif +0 -0
  36. data/public/style.css +54 -0
  37. data/shotgun.rb +20 -0
  38. data/test/example/.gitattributes +1 -0
  39. data/test/example/.gitignore +1 -0
  40. data/test/example/02/plugins/bwk.html +0 -0
  41. data/test/{plugins → example/02/plugins}/empty.rb +0 -0
  42. data/test/example/02/plugins/garbage.rb +1 -0
  43. data/test/example/02/plugins/inc.html +0 -0
  44. data/test/helper.rb +30 -27
  45. data/test/helper_cliutils.rb +34 -0
  46. data/test/test_cli.rb +86 -0
  47. data/test/test_fetch.rb +49 -18
  48. data/test/test_generate.rb +43 -16
  49. data/test/test_home.rb +33 -0
  50. data/test/test_plugin.rb +141 -0
  51. data/test/test_server.rb +21 -32
  52. data/views/list.haml +38 -0
  53. metadata +223 -110
  54. data/bin/bwkfanboy_fetch +0 -13
  55. data/bin/bwkfanboy_server +0 -126
  56. data/doc/README.erb +0 -114
  57. data/doc/README.rdoc +0 -141
  58. data/doc/TODO +0 -7
  59. data/doc/bwkfanboy_fetch.rdoc +0 -4
  60. data/doc/bwkfanboy_generate.rdoc +0 -7
  61. data/doc/bwkfanboy_parse.rdoc +0 -7
  62. data/doc/bwkfanboy_server.rdoc +0 -35
  63. data/doc/rakefile.rb +0 -59
  64. data/lib/bwkfanboy/generate.rb +0 -63
  65. data/lib/bwkfanboy/parser.rb +0 -156
  66. data/lib/bwkfanboy/plugins/bwk.rb +0 -33
  67. data/lib/bwkfanboy/plugins/econlib.rb +0 -34
  68. data/lib/bwkfanboy/plugins/freebsd-ports-update.rb +0 -76
  69. data/lib/bwkfanboy/plugins/inc.rb +0 -37
  70. data/lib/bwkfanboy/schema.js +0 -39
  71. data/test/popen4.sh +0 -4
  72. data/test/rake_git.rb +0 -36
  73. data/test/semis/Rakefile +0 -35
  74. data/test/semis/bwk.html +0 -393
  75. data/test/semis/bwk.json +0 -82
  76. data/test/semis/econlib.html +0 -21
  77. data/test/semis/inc.html +0 -1067
  78. data/test/semis/links.txt +0 -4
  79. data/test/test_parse.rb +0 -27
  80. data/test/xml-clean.sh +0 -8
  81. data/web/bwkfanboy.cgi +0 -36
@@ -1,25 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
- # -*-ruby-*-
3
2
 
4
- require_relative '../lib/bwkfanboy/generate'
3
+ require_relative '../lib/bwkfanboy/cliutils'
4
+ require_relative '../lib/bwkfanboy/generator'
5
+ include Bwkfanboy
5
6
 
6
- $conf = {
7
- banner: "Usage: #{File.basename($0)} [options] < json",
8
- check: false
9
- }
10
-
11
- # we are expection the input ONLY in UTF-8
12
- Encoding.default_external = 'UTF-8'
13
-
14
- o = Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner])
15
- o.on('--check', 'Validate the input (slow!)') { |i| $conf[:check] = true }
16
- Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], o) # run cl parser
7
+ ### main
17
8
 
18
9
  begin
19
- j = JSON.parse(STDIN.read)
20
- rescue
21
- Bwkfanboy::Utils.errx(1, "stdin had invalid JSON");
10
+ puts Generator.atom(Generator.unpack $stdin)
11
+ rescue GeneratorException
12
+ CliUtils.errx EX_DATAERR, $!.to_s
22
13
  end
23
-
24
- if $conf[:check] then Bwkfanboy::Generate.validate(j) end
25
- puts Bwkfanboy::Generate.atom(j)
@@ -1,27 +1,31 @@
1
1
  #!/usr/bin/env ruby
2
- # -*-ruby-*-
3
2
 
4
- require_relative '../lib/bwkfanboy/parser'
3
+ require_relative '../lib/bwkfanboy/home'
4
+ require_relative '../lib/bwkfanboy/plugin'
5
+ include Bwkfanboy
5
6
 
6
- $conf = {
7
- banner: "Usage: #{File.basename($0)} [options] /path/to/my/plugin.rb < html"
7
+ ### main
8
+
9
+ home = Home.new {|loader, o|
10
+ loader.conf[:banner] = "#{File.basename $0} [options] plugin [opt1 opt2 ...] < html"
8
11
  }
9
12
 
10
- Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], nil, true)
13
+ CliUtils.errx EX_USAGE, home.conf[:banner] unless BH.all_set?(ARGV[0])
11
14
 
12
- if ARGV.size == 0 then
13
- abort($conf[:banner])
14
- else
15
- Bwkfanboy::Utils.plugin_load(ARGV[0], Bwkfanboy::Meta::PLUGIN_CLASS)
15
+ # load plugin
16
+ stream = [$stdin]
17
+ begin
18
+ p = Plugin.new home.conf[:plugins_path], ARGV[0], ARGV[1..-1] do |loader|
19
+ CliUtils.veputs 2, "'#{ARGV[0]}' plugin loaded from #{loader.origin}"
20
+ end
21
+ p.run_parser stream
22
+ rescue PluginException
23
+ CliUtils.errx EX_DATAERR, $!.to_s
16
24
  end
17
25
 
18
- opt = Bwkfanboy::Utils.plugin_opts(ARGV)
19
- pn = Page.new(opt)
20
- pn.check()
21
- pn.parse(STDIN)
22
-
23
- if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
24
- pn.dump()
26
+ # print output
27
+ if home.conf[:verbose] >= 1
28
+ pp p
25
29
  else
26
- puts pn.to_json()
30
+ p.pack $stdout
27
31
  end
@@ -0,0 +1,40 @@
1
+ # -*-ruby-*-
2
+ require File.expand_path('../lib/bwkfanboy/meta', __FILE__)
3
+ include Bwkfanboy
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.authors = [Meta::NAME]
7
+ gem.email = [Meta::EMAIL]
8
+ gem.description = 'A converter from a raw HTML to an Atom feed. You can use it to watch sites that do not provide its own feed'
9
+ gem.summary = gem.description + '.'
10
+ gem.homepage = Meta::HOMEPAGE
11
+
12
+ gem.files = `git ls-files`.split($\)
13
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
14
+ gem.test_files = gem.files.grep(%r{^test/test_.+\.rb})
15
+ gem.name = Meta::NAME
16
+ gem.version = Meta::VERSION
17
+
18
+ gem.required_ruby_version = '>= 1.9.2'
19
+ gem.extra_rdoc_files = gem.files.grep(%r{^doc/})
20
+ gem.rdoc_options << '-m' << 'README.rdoc'
21
+
22
+ gem.post_install_message = <<-MESSAGE
23
+ This version was rewritten from scratch.
24
+ Plugins from & for previous versions (0.x & 1.x) WILL NOT WORK.
25
+
26
+ See also doc/NEWS.rdoc file.
27
+ MESSAGE
28
+
29
+ gem.add_dependency "open4", "~> 1.3.0"
30
+ gem.add_dependency "msgpack", "~> 0.4.6"
31
+ gem.add_dependency "rake", "~> 0.9.2.2"
32
+ gem.add_dependency "nokogiri", "~> 1.5.2"
33
+ gem.add_dependency "sinatra", "~> 1.3.2"
34
+ gem.add_dependency "haml", "~> 3.1.4"
35
+ gem.add_dependency "rdoc", "~> 3.12"
36
+
37
+ gem.add_development_dependency "minitest", "~> 2.12.1"
38
+ gem.add_development_dependency "fakefs", "~> 0.4.0"
39
+ gem.add_development_dependency "rack-test", "~> 0.6.1"
40
+ end
@@ -0,0 +1,3 @@
1
+ require './lib/bwkfanboy/server'
2
+ run Bwkfanboy::MyApp
3
+
@@ -1,92 +1,34 @@
1
- === 1.4.1
1
+ === 2.0.0
2
2
 
3
- Wed Apr 20 21:12:16 EEST 2011
3
+ Mon Apr 23 07:28:52 EEST 2012
4
4
 
5
- - Repackage under RVM due to a strange bug (in native FreeBSD ruby +
6
- rubygems 1.7.2) with a date in the generated gem.
5
+ Changes from 1.4.1:
7
6
 
8
- === 1.4.0
7
+ * CGI script & webrick-based server are gone. Use rackup to start a
8
+ Sinatra app instead. HTTP API is completely new, which means you'll
9
+ need to change URLs in your RSS reader.
9
10
 
10
- Wed Apr 6 23:45:39 EEST 2011
11
+ * bwkfanboy_server is gone.
12
+
13
+ * Plugin interface API completely changed to work nicely with the
14
+ long-running Sinatra app.
11
15
 
12
- - A new plugin: articles (per-user) from inc.com.
16
+ This means you'll need to update all your personal plugins. I am very
17
+ sorry.
13
18
 
14
- - Removed quora plugin.
19
+ * bwkfanboy_fetch CL utility is gone. If you still need it, use curl
20
+ instead.
15
21
 
16
- - Fixed a but with '-t' command line option.
22
+ * bwkfanboy:
17
23
 
18
- === 1.3.2
24
+ - '-t' CL option is '-s' now;
19
25
 
20
- Mon Mar 28 12:13:31 EEST 2011
26
+ - output in '-i' is different;
21
27
 
22
- - Fixed a small bug with an Atom feed for plugins with Meta::URI options.
28
+ - '-p' is '--plugins-path'.
23
29
 
24
- - Updated quora plugin (a rather useless operation, because from [insert
25
- date] Quora provides its own answers-feed). This plugin will be
26
- removed.
30
+ * bwkfanboy_parse emits MessagePack instead of JSON. (Who cares?)
27
31
 
28
- === 1.3.1
32
+ * Log files in /tmp are gone. If you need logs from the Sinatra app,
33
+ comment in etc/sinatra.rb a monkey patch for rack.
29
34
 
30
- Thu Mar 17 17:19:52 EET 2011
31
-
32
- - Updated quora plugin.
33
-
34
- - Updated 1 test for jsonschema 2.0.1.
35
-
36
- - Updated the list of dependencies.
37
-
38
- === 1.3.0
39
-
40
- Thu Feb 10 11:17:57 EET 2011
41
-
42
- - A new plugin: articles form econlib.org.
43
-
44
- - (internal) Injected a small part of falsework's naive template.
45
-
46
- === 1.2.9
47
-
48
- - Reverted to rubygems 1.3.7 due to mysterious 1.5.0 behaviour.
49
-
50
- === 1.2.8
51
-
52
- - Updated quora plugin for nodejs 0.3.7.
53
-
54
- === 1.2.7
55
-
56
- - Fixed a bug with JavaScript parsing in quora plugin.
57
-
58
- === 1.2.5
59
-
60
- - Added '-t' CL option for bwkfanboy util.
61
-
62
- - Updated quora plugin.
63
-
64
- === 1.1.4
65
-
66
- - INCOMPATIBILITY: from now on, all plugins must do NOT read the stdin
67
- but bwkfanboy's provided stream. See doc/plugins.rdoc.
68
-
69
- - Moved the code from bwkfanboy_* to libraries.
70
-
71
- - From now on, bwkfanboy util by default do not run utils in a pipe but
72
- uses libraries directly. One can restore the old functionality with
73
- '-O' CL swith.
74
-
75
- === 0.1.4
76
-
77
- - The minimum required Ruby version is 1.9.2.
78
- - bwkfanboy_server now inserts a Last-Modified header.
79
- - Fixed (?) a bug in bwkfanboy_generate with external encoding.
80
- - Updated tests for Ruby 1.9.2.
81
-
82
- === 0.1.3
83
-
84
- - Updated Quora plugin.
85
- - Added '-V' command line option to all utils.
86
- - Updated tests.
87
-
88
- === 0.1.1
89
-
90
- - Plugins can have user-supplied options in realtime.
91
- - Added quora.rb plugin.
92
- - Fixed some typos.
@@ -1,88 +1,81 @@
1
1
  = How to Write a \Plugin
2
2
 
3
- First of all, look at examples provided with bwkfanboy. They were
4
- intended to be 100% working because I was writing them for myself.
3
+ First of all, look at examples included with bwkfanboy.
5
4
 
6
- Basically, all you need is to write a class named _Page_ that inherits
7
- class Bwkfanboy::Parse, override in the child \#myparse method and write
8
- a simple module named _Meta_ inside your _Page_ class.
5
+ Basically, all you need is to write 1 method (function) called +parse+
6
+ and set several instance variables.
9
7
 
10
8
  == Skeleton
11
9
 
12
- Here is a skeleton of a plugin:
13
-
14
- require 'nokogiri'
15
-
16
- class Page < Bwkfanboy::Parse
17
- module Meta
18
- URI = 'http://example.org/news'
19
- ENC = 'UTF-8'
20
- VERSION = 1
21
- COPYRIGHT = '(c) 2010 John Doe'
22
- TITLE = "News from example.org"
23
- CONTENT_TYPE = 'html'
24
- end
25
-
26
- def myparse(stream)
27
- # read 'stream' IO object and parse it
28
- doc = Nokogiri::HTML(stream, nil, Meta::ENC)
29
- doc.xpath("XPATH QUERY").each {|i|
30
- t = clean(i.xpath("XPATH QUERY").text())
31
- l = clean(i.xpath("XPATH QUERY").text())
32
- u = date(i.xpath("XPATH QUERY").text())
33
- a = clean(i.xpath("XPATH QUERY").text())
34
- c = clean(i.xpath("XPATH QUERY").text())
35
-
36
- self << { title: t, link: l, updated: u, author: a, content: c }
10
+ Here is a simple plugin skeleton:
11
+
12
+ @uri << 'http://example.org/news'
13
+ @enc = 'UTF-8'
14
+ @version = 1
15
+ @copyright = '(c) 2012 John Doe'
16
+ @title = "News from example.org"
17
+ @content_type = 'html'
18
+
19
+ def parse streams
20
+ streams.each do |io|
21
+ doc = Nokogiri::HTML io, nil, @enc
22
+ doc.xpath("XPATH QUERY").each {|idx|
23
+ t = idx.xpath("XPATH QUERY").text
24
+ l = idx.xpath("XPATH QUERY").text
25
+ u = BH.date idx.xpath("XPATH QUERY").text
26
+ a = idx.xpath("XPATH QUERY").text
27
+ c = idx.xpath("XPATH QUERY").text
28
+
29
+ self << { 'title' => t, 'link' => l, 'updated' => u,
30
+ 'author' => a, 'content' => c }
37
31
  }
38
32
  end
39
33
  end
40
34
 
35
+
41
36
  You can get the skeleton in the current directory by typing:
42
37
 
43
- % bwkfanboy -t myplugin.rb
38
+ % bwkfanboy -s myplugin
44
39
 
45
- As you see, we are using Nokogiri for HTML parsing. You are not
46
- required to use it too--take the parser whatever you like. Nokogiri
47
- is nice, because it's able to read a broken HTML and search thought
48
- it via XPath. If you would like to use, for example, REXML, beware
49
- that it loves only strict XML--you may need to clean the HTML with
50
- an external utility such as Tide.
40
+ As you see, we are using Nokogiri for HTML parsing. You are not required
41
+ to use it too--choose the parser whatever you like. Nokogiri is nice,
42
+ because it's able to read a broken HTML and search thought it via
43
+ XPath. If you would like to use, for example, REXML, beware that it
44
+ loves only strict XML--you may need to clean the HTML with an external
45
+ utility such as Tide.
51
46
 
52
- Bwkfanboy loads a plugin from 1 file as a valid Ruby code. It means
53
- that the plugin can contain *any* Ruby code, but doesn't mean that
54
- it should.
47
+ Bwkfanboy loads a plugin from 1 file as a valid Ruby code (via
48
+ <tt>instance_eval</tt>). This means that the plugin can contain *any*
49
+ Ruby code, but doesn't mean that it should.
55
50
 
56
51
  === \Meta
57
52
 
58
- Module _Meta_ can only have constants--and *all* constants listed in the
59
- skeleton are mandatory.
53
+ Several instance variable are required to be set.
60
54
 
61
- * <tt>URI</tt>--can be a <tt>http(s)://</tt> or <tt>ftp://</tt> URL
62
- or just a path to a file on your local machine, as
63
- <tt>/home/bob/huzza.html</tt>. This is the source that
64
- bwkfanboy will be transforming to the Atom feed.
55
+ * <tt>@uri</tt>--is a an <b>array of strings</b> <tt>http(s)://</tt> or
56
+ <tt>ftp://</tt> URL or just a path to a file on your local machine, as
57
+ <tt>/home/bob/huzza.html</tt>. This is sources that bwkfanboy will use
58
+ to create 1 Atom feed.
65
59
 
66
- * <tt>ENC</tt>--an encoding for URI.
60
+ * <tt>@enc</tt>--an encoding for *all* URIs in @uri.
67
61
 
68
- * <tt>VERSION</tt>--a version of a plugin.
62
+ * <tt>@version</tt>--a version of a plugin. Any unsigned integer.
69
63
 
70
- * <tt>COPYRIGHT</tt>--some boring string.
64
+ * <tt>@copyright</tt>--some boring string.
71
65
 
72
- * <tt>TITLE</tt>--a short description of the future feed. It'll be
73
- used later in the resulting XML.
66
+ * <tt>@title</tt>--a short description of the future feed.
74
67
 
75
- * <tt>CONTENT_TYPE</tt>--one of +xhtml+, +html+ or +text+ values. This is
76
- very important constant because it says in what format entries
77
- will be placed in the feed. Usually it's safe to use +html+.
68
+ * <tt>@content_type</tt>--one of +xhtml+, +html+ or +text+ values. This
69
+ is very important veriable because it says what format entries would
70
+ have in the feed. Usually it's safe to use +html+.
78
71
 
79
- === myparse
72
+ === parse
80
73
 
81
- In \#myparse method read 'stream' IO object. The contents of it is the
74
+ +parse+ method uses an array of IO objects. The contents of them is the
82
75
  raw HTML you want to parse. The general idea:
83
76
 
84
77
  * Atom feed must contain at least 1 entry, so look in the HTML for some
85
- crap which you must break into 5 peaces:
78
+ crap that you will break into 5 peaces:
86
79
 
87
80
  - a title of the entry
88
81
  - a link for it
@@ -93,7 +86,8 @@ raw HTML you want to parse. The general idea:
93
86
  * After you scan and grab 1 entry, create a hash and add it to
94
87
  _self_ as it was in the skeleton:
95
88
 
96
- self << { title: t, link: l, updated: u, author: a, content: c }
89
+ self << { 'title' => t, 'link' => l, 'updated' => u,
90
+ 'author' => a, 'content' => c }
97
91
 
98
92
  Here variables _t_, _l_, _u_, _a_ and _c_ contains the actual
99
93
  value of 5 peaces for the entry. Names of the keys in the hash are
@@ -102,10 +96,9 @@ raw HTML you want to parse. The general idea:
102
96
  * Probably there would be more crap in the HTML that you can use to
103
97
  construct another entry. Keep parsing and adding entries.
104
98
 
105
- * While you're scanning, use the 2 helper methods for cleaning each
106
- peace: \#clean, which removed duplicate spaces and \#date, which parses
107
- a string and return a date in ISO8601 format. You may override \#date
108
- method if you like.
99
+ * While you're scanning, use the <tt>BH.date</tt> helper method, which
100
+ parses a string and return a date in ISO8601 format.
101
+
109
102
 
110
103
  === Options
111
104
 
@@ -116,40 +109,31 @@ it is silly to write a new plugin every time for a new
116
109
  participant. Instead, you can write 1 plugin which have an _option_ to
117
110
  take a parameter (a user name, in this case).
118
111
 
119
- Options (if any) are always accessible via \#opt method which is just an
120
- attr_reader of an array.
112
+ Options (if any) are always accessible via <tt>@opt</tt> valiable which is an
113
+ array of strings.
121
114
 
122
- One can play the really interesting trick with Meta::URI constant. It is
115
+ One can play the really interesting trick with <tt>@uri</tt>. It is
123
116
  possible to make it dynamic, for example:
124
117
 
125
- URI = 'http://www.quora.com/#{opt[0]}/answers'
118
+ @opt.each {|i| @uri << 'http://www.inc.com/author/' + i }
126
119
 
127
- Then, if a user will provide 1 option (say 'Mark-Suster')--it will
120
+ Then, if a user will provide 1 option (say 'jason-fried')--it will
128
121
  appear in the final URI as follows:
129
122
 
130
- http://www.quora.com/Mark-Suster/answers
123
+ http://www.inc.com/author/jason-fried
131
124
 
132
- Such dynamic is possible only for Meta::URI constant and in such case,
133
- _option_ becomes mandatory for the end-user.
134
125
 
135
126
  == How to test all this
136
127
 
137
128
  To test how nice your plugin works, save the html page to the file
138
129
  and type:
139
130
 
140
- % bwkfanboy_parse -vv /path/to/the/plugin.rb < saved_page.html
141
-
142
- to see the result as in plain text, or
143
-
144
- % bwkfanboy_parse -v /path/to/the/plugin.rb < saved_page.html
145
-
146
- as a pretty JSON.
131
+ % bwkfanboy_parse -v plugin < saved_page.html
147
132
 
148
133
  For option-enabled plugins, supply additional parameters for them after
149
134
  the plugin path:
150
135
 
151
- % bwkfanboy_parse -vv /path/to/the/plugin.rb \
152
- option_1 "option 2" < saved_page.html
136
+ % bwkfanboy_parse -v plugin opt1 opt2 < saved_page.html
153
137
 
154
138
  <tt>bwkfanboy_parse</tt> return 0 if no errors occurred or >= 1 if you
155
139
  have errors in your plugin code. N.B.: the output from