RubyGems - bwkfanboy - Versions diffs - 0.1.3 → 1.1.4 - Mend

bwkfanboy 0.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

data/README.rdoc +8 -4
data/Rakefile +9 -10
data/bin/bwkfanboy +30 -8
data/bin/bwkfanboy_fetch +6 -23
data/bin/bwkfanboy_generate +7 -62
data/bin/bwkfanboy_parse +3 -9
data/bin/bwkfanboy_server +9 -36
data/doc/NEWS.rdoc +16 -2
data/doc/README.rdoc +8 -4
data/doc/bwkfanboy_fetch.rdoc +4 -0
data/doc/bwkfanboy_generate.rdoc +7 -0
data/doc/bwkfanboy_parse.rdoc +7 -0
data/doc/bwkfanboy_server.rdoc +35 -0
data/doc/plugin.rdoc +32 -29
data/lib/bwkfanboy/fetch.rb +36 -0
data/lib/bwkfanboy/generate.rb +63 -0
data/lib/bwkfanboy/parser.rb +4 -4
data/lib/bwkfanboy/plugins/bwk.rb +3 -3
data/lib/bwkfanboy/plugins/freebsd-ports-update.rb +3 -3
data/lib/bwkfanboy/plugins/quora.rb +3 -3
data/lib/bwkfanboy/utils.rb +2 -2
data/test/test_fetch.rb +1 -4
data/test/test_generate.rb +0 -3
data/test/test_parse.rb +4 -7
data/test/test_server.rb +0 -3
data/test/ts_utils.rb +8 -0
metadata +30 -19
data/test/plugins/bwk.rb +0 -29

data/README.rdoc CHANGED Viewed

@@ -11,6 +11,10 @@ general assistance.
 than the whole gem on rubygems.org, so grab the source before
 struggling).
+Plugins from version 1.1.4 are *incompatible* with the 0.1.x
+series. Please reread in doc/plugin.rdoc the example of the skeleton
+plugin.
 = Architecture
 == Plugins
@@ -18,10 +22,10 @@ struggling).
 bwkfanboy comes with several plugins. One of them, for example, parses a
 search page of dailyprincetonian.com looking for bwk's articles.
-The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse
-parent, overriding 1 method.
+The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse parent,
+overriding 1 method.
-The plugins can be in the system
+Plugins can be in system
   `gem env gemdir`/gems/bwkfanboy-x.y.z/lib/bwkfanboy/plugins
@@ -89,7 +93,7 @@ There are 2 method to get an Atom feed via HTTP:
 2. Small *bwkfanboy_server* HTTP server. It can run from any user and
    thus is able to inherit env variables for discovering your HOME
-   directory. Read bin/bwkfanboy_server to know how to operate it.
+   directory. Read doc/bwkfanboy_server.rdoc to know how to operate it.
 = License
 :include: doc/LICENSE

data/Rakefile CHANGED Viewed

@@ -9,12 +9,12 @@ require 'rake/testtask'
 spec = Gem::Specification.new() {|i|
   i.name = "bwkfanboy"
   i.summary = 'A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.'
-  i.version = '0.1.3'
+  i.version = `bin/#{i.name} -V`
   i.author = 'Alexander Gromnitsky'
   i.email = 'alexander.gromnitsky@gmail.com'
-  i.homepage = 'http://github.com/gromnitsky/bwkfanboy'
+  i.homepage = "http://github.com/gromnitsky/#{i.name}"
   i.platform = Gem::Platform::RUBY
-  i.required_ruby_version = '>= 1.9'
+  i.required_ruby_version = '>= 1.9.2'
   i.files = FileList['lib/**/*', 'bin/*', 'doc/*', '[A-Z]*', 'test/**/*']
   i.executables = FileList['bin/*'].gsub(/^bin\//, '')
@@ -22,10 +22,10 @@ spec = Gem::Specification.new() {|i|
   i.test_files = FileList['test/test_*.rb']
-  i.rdoc_options << '-m' << 'Bwkfanboy' << '-x' << 'plugins'
-  i.extra_rdoc_files = FileList['bin/*', 'doc/*']
+  i.rdoc_options << '-m' << 'doc/README.rdoc' << '-x' << 'plugins'
+  i.extra_rdoc_files = FileList['doc/*']
-  i.add_dependency('activesupport', '>= 3.0.0')
+  i.add_dependency('activesupport', '>= 3.0.1')
   i.add_dependency('nokogiri', '>=  1.4.3')
   i.add_dependency('open4', '>=  1.0.1')
   i.add_dependency('jsonschema', '>= 2.0.0')
@@ -36,12 +36,11 @@ Rake::GemPackageTask.new(spec).define()
 task(default: %(repackage))
 Rake::RDocTask.new('doc') {|i|
-  i.main = "Bwkfanboy"
-  i.rdoc_files = FileList['doc/*', 'lib/**/*.rb', 'bin/*']
-  i.rdoc_files.exclude("lib/**/plugins", "test")
+  i.main = 'doc/README.rdoc'
+  i.rdoc_files = FileList['doc/*', 'lib/**/*.rb']
+  i.rdoc_files.exclude("lib/**/plugins")
 }
 Rake::TestTask.new() {|i|
   i.test_files = FileList['test/test_*.rb']
-  i.verbose = true
 }

data/bin/bwkfanboy CHANGED Viewed

@@ -1,4 +1,4 @@
-#!/usr/bin/env ruby19
+#!/usr/bin/env ruby
 # -*-ruby-*-
 # This program is executed by bin/bwkfanboy_server to do all dirty work:
@@ -11,13 +11,14 @@
 #
 #   % bwkfanboy -h
 #
-# to get some basic help & read about Bwkfanboy module.
+# to get some basic help--read about Bwkfanboy module.
 require 'shellwords'
 require_relative '../lib/bwkfanboy/parser'
 $conf = {
-  mode: 'pipe',
+  mode: 'fast',
+  debug: false,
   banner: "Usage: #{File.basename($0)} [options] plugin-name"
 }
@@ -87,7 +88,8 @@ o = Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner]) # create OptionParser object
 o.on('-i', 'Show some info about the plugin') { |i| $conf[:mode] = 'info' }
 o.on('-l', 'List all plugins') { |i| $conf[:mode] = 'list' }
 o.on('-p', 'List all plugins paths') { |i| $conf[:mode] = 'path' }
-o.on('-D', '(ignore this) Use URI_DEBUG const instead URI in plugins') { |i| $conf[:mode] = 'debug' }
+o.on('-O', '(ignore this) Execute all bwkfanboy_* utils in a pipe') { |i| $conf[:mode] = 'pipe' }
+o.on('-D', '(ignore this) Use URI_DEBUG const instead URI in plugins') { |i| $conf[:debug] = true }
 Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], o) # run cl parser
 plugin = Plugin.new(ARGV[0])
@@ -107,12 +109,11 @@ when 'info'
   plugin.load(opt).dump_info
 when 'version'
   puts Bwkfanboy::Meta::VERSION
-else
-  # A pipe mode
+when 'pipe'
   pn = plugin.load(opt)
   cmd = "./bwkfanboy_fetch | ./bwkfanboy_parse '#{plugin.path}' #{opt.size != 0 ? Shellwords.join(opt) : ''} | ./bwkfanboy_generate"
   if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
-    puts ($conf[:mode] != 'debug' ? pn.uri() : pn.class::Meta::URI_DEBUG)
+    puts (!$conf[:debug] ? pn.uri() : pn.class::Meta::URI_DEBUG)
     puts cmd
     exit 0
   end
@@ -121,10 +122,31 @@ else
   Dir.chdir(File.dirname(File.expand_path($0)))
   pipe = IO.popen(cmd, 'w+')
-  pipe.puts ($conf[:mode] != 'debug' ? pn.uri : pn.class::Meta::URI_DEBUG)
+  pipe.puts (!$conf[:debug] ? pn.uri : pn.class::Meta::URI_DEBUG)
   pipe.close_write
   while line = pipe.gets
     puts line
   end
   pipe.close
+else
+  # a 'new', faster way
+  pn = plugin.load(opt)
+  require_relative '../lib/bwkfanboy/fetch'
+  require_relative '../lib/bwkfanboy/generate'
+  # 1. fetch & parse
+  pn = Page.new(opt)
+  Bwkfanboy::Fetch.cat(!$conf[:debug] ? pn.uri() : pn.class::Meta::URI_DEBUG) {|stream|
+    pn.parse(stream)
+  }
+  if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
+    pn.dump()
+    exit 0
+  end
+  # 2.generate
+  puts Bwkfanboy::Generate.atom(JSON.parse(pn.to_json))
 end

data/bin/bwkfanboy_fetch CHANGED Viewed

@@ -1,30 +1,13 @@
-#!/usr/bin/env ruby19
+#!/usr/bin/env ruby
 # -*-ruby-*-
-# Read stdin for a URI or a full path to the local file, download it (or
-# read for the local file) and print to stdout.
-require 'open-uri'
-require_relative '../lib/bwkfanboy/utils'
+require_relative '../lib/bwkfanboy/fetch'
 $conf = { banner: "Usage: #{File.basename($0)} [options] < uri" }
 Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], nil, true)
-uri = gets.chomp()
-Bwkfanboy::Utils.veputs(1, "fetching #{uri}\n")
-begin
-  open(uri, "User-Agent" => Bwkfanboy::Meta::USER_AGENT) {|f|
-    if defined?(f.meta) && f.status[0] != '200' then
-      Bwkfanboy::Utils.errx(1, "cannot fetch #{uri} : HTTP responce: #{f.status[0]}")
-    end
-    Bwkfanboy::Utils.veputs(1, "charset = #{f.content_type_parse[1][1]}\n") if defined?(f.meta)
-    f.each_line {|i| puts i}
-  }
-rescue
-  # typically Errno::ENOENT
-  Bwkfanboy::Utils.errx(1, "cannot fetch: #{$!}");
-end
+uri = gets
+Bwkfanboy::Fetch.cat(uri) {|f|
+  f.each_line {|line| puts line }
+}

data/bin/bwkfanboy_generate CHANGED Viewed

@@ -1,24 +1,16 @@
-#!/usr/bin/env ruby19
+#!/usr/bin/env ruby
 # -*-ruby-*-
-# Read stdin for JSON, generate from it an Atom feed and print the
-# result to stdout in UTF-8.
-#
-# One can validate the JSON by providing '--check' command line option
-# (by default the validating is off).
-require 'rss/maker'
-require 'date'
-require 'json'
-require 'jsonschema'
-require_relative '../lib/bwkfanboy/utils'
+require_relative '../lib/bwkfanboy/generate'
 $conf = {
   banner: "Usage: #{File.basename($0)} [options] < json",
   check: false
 }
+# we are expection the input ONLY in UTF-8
+Encoding.default_external = 'UTF-8'
 o = Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner])
 o.on('--check', 'Validate the input (slow!)') { |i| $conf[:check] = true }
 Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], o) # run cl parser
@@ -29,52 +21,5 @@ rescue
   Bwkfanboy::Utils.errx(1, "stdin had invalid JSON");
 end
-# validate the input
-schema = Bwkfanboy::Utils.gem_dir_system() + '/schema.js'
-if $conf[:check] then
-  begin
-    JSON::Schema.validate(j, JSON.parse(File.read(schema)))
-  rescue
-    Bwkfanboy::Utils.errx(1, "JSON validation with schema (#{schema}) failed");
-  end
-end
-feed = RSS::Maker.make("atom") { |maker|
-  maker.channel.id = j['channel']['id']
-  maker.channel.updated = j['channel']['updated']
-  maker.channel.author = j['channel']['author']
-  maker.channel.title = j['channel']['title']
-  maker.channel.links.new_link {|i|
-    i.href = j['channel']['link']
-    i.rel = 'alternate'
-    i.type = 'text/html' # eh
-  }
-  maker.items.do_sort = true
-  j['x_entries'].each { |i|
-    maker.items.new_item do |item|
-      item.links.new_link {|k|
-        k.href = i['link']
-        k.rel = 'alternate'
-        k.type = 'text/html' # only to make happy crappy pr2nntp gateway
-      }
-      item.title = i['title']
-      item.author = i['author']
-      item.updated = i['updated']
-      item.content.type = j['channel']['x_entries_content_type']
-      case item.content.type
-      when 'text'
-        item.content.content = i['content']
-      when 'html'
-        item.content.content = i['content']
-      else
-        item.content.xhtml = i['content']
-      end
-    end
-  }
-}
-puts feed
+if $conf[:check] then Bwkfanboy::Generate.validate(j) end
+puts Bwkfanboy::Generate.atom(j)

data/bin/bwkfanboy_parse CHANGED Viewed

@@ -1,12 +1,6 @@
-#!/usr/bin/env ruby19
+#!/usr/bin/env ruby
 # -*-ruby-*-
-# Take 1 command line parameter: a full path to a plugin.
-#
-# Read stdin for a HTML, parse it and print the result to stdout in JSON
-# format. If '-vv' command line parameters were given, output will be in
-# 'key: value' pairs and <em>not</em> in JSON.
 require_relative '../lib/bwkfanboy/parser'
 $conf = {
@@ -19,12 +13,12 @@ if ARGV.size == 0 then
   abort($conf[:banner])
 else
   Bwkfanboy::Utils.plugin_load(ARGV[0], Bwkfanboy::Meta::PLUGIN_CLASS)
-end;
+end
 opt = Bwkfanboy::Utils.plugin_opts(ARGV)
 pn = Page.new(opt)
 pn.check()
-pn.parse()
+pn.parse(STDIN)
 if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
   pn.dump()

data/bin/bwkfanboy_server CHANGED Viewed

@@ -1,42 +1,9 @@
-#!/usr/bin/env ruby19
+#!/usr/bin/env ruby
 # -*-ruby-*-
-# Start a HTTP server (by default on 127.0.0.1:9042). To get Atom feeds
-# from it, initiate GET request with URI
-#
-#   http://localhost:9042/?p=PLUGIN
-#
-# where +PLUGIN+ is a name of a bwkfanboy's plugin (without '.re' suffix).
-# If the plugin requires additional options you can specify them like:
-#
-#   http://localhost:9042/?p=PLUGIN&o=opt1%20%22opt2%20has%20spaces%22
-#
-# where <tt>opt1%20%22opt2%20has%20spaces%22</tt> is a encoded string
-# <tt>opt1 "opt2 has spaces"</tt>.
-#
-# To list all available plugins, point you browser to
-#
-#   http://localhost:9042/list
-#
-# The server is intended to run from a non-root user from
-# <tt>~/.login</tt> file. It can detach from a terminal if you give it
-# '-d' command line option.
-#
-# For other help, type:
-#
-#  bwkfanboy_server -h
-#
-# The server maintains 2 logs:
-#
-#   /tmp/bwkfanboy/USER/log/bwkfanboy_server.log
-#   /tmp/bwkfanboy/USER/log/bwkfanboy_server-access.log
-#
-# The file with a pid:
-#
-#   /tmp/bwkfanboy/USER/bwkfanboy_server.pid
 require 'shellwords'
 require 'webrick'
+require 'date'
 require_relative '../lib/bwkfanboy/utils'
 $conf = {
@@ -78,8 +45,14 @@ class FeedServlet < WEBrick::HTTPServlet::AbstractServlet # :nodoc: all
       if r[0] != 0 then
         raise WEBrick::HTTPStatus::InternalServerError.new("Errors in the pipeline:\n\n #{r[1]}")
       end
       res.body = r[2]
+      # search for <updated> tag and set Last-Modified header
+      if (m = r[2].match('<updated>(.+?)</updated>'))
+        res['Last-Modified'] = DateTime.parse(m.to_s).httpdate
+      end
     else
       raise WEBrick::HTTPStatus::InternalServerError.new("Parameter 'p' required")
     end

data/doc/NEWS.rdoc CHANGED Viewed

@@ -1,6 +1,20 @@
-=== Current
+=== 1.1.4
-- See git log.
+- INCOMPATIBILITY: from now on, all plugins must do NOT read the stdin
+  but bwkfanboy's provided stream. See doc/plugins.rdoc.
+- Moved the code from bwkfanboy_* to libraries.
+- From now on, bwkfanboy util by default do not run utils in a pipe but
+  uses libraries directly. One can restore the old functionality with
+  '-O' CL swith.
+=== 0.1.4
+- The minimum required Ruby version is 1.9.2.
+- bwkfanboy_server now inserts a Last-Modified header.
+- Fixed (?) a bug in bwkfanboy_generate with external encoding.
+- Updated tests for Ruby 1.9.2.
 === 0.1.3

data/doc/README.rdoc CHANGED Viewed

@@ -11,6 +11,10 @@ general assistance.
 than the whole gem on rubygems.org, so grab the source before
 struggling).
+Plugins from version 1.1.4 are *incompatible* with the 0.1.x
+series. Please reread in doc/plugin.rdoc the example of the skeleton
+plugin.
 = Architecture
 == Plugins
@@ -18,10 +22,10 @@ struggling).
 bwkfanboy comes with several plugins. One of them, for example, parses a
 search page of dailyprincetonian.com looking for bwk's articles.
-The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse
-parent, overriding 1 method.
+The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse parent,
+overriding 1 method.
-The plugins can be in the system
+Plugins can be in system
   `gem env gemdir`/gems/bwkfanboy-x.y.z/lib/bwkfanboy/plugins
@@ -89,7 +93,7 @@ There are 2 method to get an Atom feed via HTTP:
 2. Small *bwkfanboy_server* HTTP server. It can run from any user and
    thus is able to inherit env variables for discovering your HOME
-   directory. Read bin/bwkfanboy_server to know how to operate it.
+   directory. Read doc/bwkfanboy_server.rdoc to know how to operate it.
 = License
 :include: doc/LICENSE

data/doc/bwkfanboy_fetch.rdoc ADDED Viewed

@@ -0,0 +1,4 @@
+= bwkfanboy_fetch
+Read stdin for a URI or a full path to the local file, download it (or
+read from the local file) and print the result to stdout.

data/doc/bwkfanboy_generate.rdoc ADDED Viewed

@@ -0,0 +1,7 @@
+= bwkfanboy_generate
+Read stdin for JSON, generate from it an Atom feed and print the
+result to stdout in UTF-8.
+One can validate the JSON by providing '--check' command line option
+(by default the validating is off).

data/doc/bwkfanboy_parse.rdoc ADDED Viewed

@@ -0,0 +1,7 @@
+= bwkfanboy_parse
+Takes 1 command line parameter: a full path to a plugin.
+Reads stdin for a HTML, parses it and prints the result to stdout in
+JSON format. If '-vv' command line parameters were given, output will
+be in 'key: value' pairs and <em>not</em> in JSON.

data/doc/bwkfanboy_server.rdoc ADDED Viewed

@@ -0,0 +1,35 @@
+= bwkfanboy_server
+Start a HTTP server (by default on 127.0.0.1:9042). To get Atom feeds
+from it, initiate GET request with URI
+  http://localhost:9042/?p=PLUGIN
+where +PLUGIN+ is a name of a bwkfanboy's plugin (without '.re' suffix).
+If the plugin requires additional options you can specify them like:
+  http://localhost:9042/?p=PLUGIN&o=opt1%20%22opt2%20has%20spaces%22
+where <tt>opt1%20%22opt2%20has%20spaces%22</tt> is a encoded string
+<tt>opt1 "opt2 has spaces"</tt>.
+To list all available plugins, point you browser to
+  http://localhost:9042/list
+The server is intended to run from a non-root user from
+<tt>~/.login</tt> file. It can detach from a terminal if you give it
+'-d' command line option.
+For other help, type:
+ bwkfanboy_server -h
+The server maintains 2 logs:
+  /tmp/bwkfanboy/USER/log/bwkfanboy_server.log
+  /tmp/bwkfanboy/USER/log/bwkfanboy_server-access.log
+The file with a pid:
+  /tmp/bwkfanboy/USER/bwkfanboy_server.pid

data/doc/plugin.rdoc CHANGED Viewed

@@ -1,12 +1,11 @@
-= HOWTO Write a \Plugin
+= How to Write a \Plugin
 First of all, look at examples provided with bwkfanboy. They were
 intended to be 100% working because I was writing them for myself.
-Basically, all you need is to write a class named _Page_ that
-inherits this class Bwkfanboy::Parse, override in the child #myparse
-method and write a simple module named _Meta_ inside your _Page_
-class.
+Basically, all you need is to write a class named _Page_ that inherits
+class Bwkfanboy::Parse, override in the child \#myparse method and write
+a simple module named _Meta_ inside your _Page_ class.
 == Skeleton
@@ -24,9 +23,9 @@ Here is a skeleton of a plugin:
       CONTENT_TYPE = 'html'
     end
-    def myparse()
-      # read stdin and parse it
-      doc = Nokogiri::HTML(STDIN, nil, Meta::ENC)
+    def myparse(stream)
+      # read 'stream' IO object and parse it
+      doc = Nokogiri::HTML(stream, nil, Meta::ENC)
       doc.xpath("XPATH QUERY").each {|i|
         t = clean(i.xpath("XPATH QUERY").text())
         l = clean(i.xpath("XPATH QUERY").text())
@@ -52,8 +51,8 @@ it should.
 === \Meta
-Module _Meta_ can have only constants--and *all* constants listed in
-the skeleton are required.
+Module _Meta_ can only have constants--and *all* constants listed in the
+skeleton are mandatory.
 * <tt>URI</tt>--can be a <tt>http(s)://</tt> or <tt>ftp://</tt> URL
   or just a path to a file on your local machine, as
@@ -75,13 +74,17 @@ the skeleton are required.
 === myparse
-In #myparse method please read stdin. The contends of it is the raw
-HTML you want to parse. The general idea:
+In \#myparse method read 'stream' IO object. The contents of it is the
+raw HTML you want to parse. The general idea:
-* Atom feed must contain at least 1 entry, so look in HTML for some
-  crap which you break into 5 peaces: title of the entry, link for
-  it, a date for the entry, who is author of the entry and its
-  contents.
+* Atom feed must contain at least 1 entry, so look in the HTML for some
+  crap which you must break into 5 peaces:
+  - a title of the entry
+  - a link for it
+  - a date for the entry
+  - who is the author of the entry and
+  - its contents.
 * After you scan and grab 1 entry, create a hash and add it to
   _self_ as it was in the skeleton:
@@ -89,20 +92,20 @@ HTML you want to parse. The general idea:
     self << { title: t, link: l, updated: u, author: a, content: c }
   Here variables _t_, _l_, _u_, _a_ and _c_ contains the actual
-  values of 5 peaces for the entry. Names of the keys in hash are
+  value of 5 peaces for the entry. Names of the keys in the hash are
   important of course--don't invent your own.
-* There would be probably more crap in HTML that you can use to
+* Probably there would be more crap in the HTML that you can use to
   construct another entry. Keep parsing and adding entries.
-* While you scanning, use the 2 helper methods for cleaning each
-  peace: \#clean, which removed duplicate spaces and #date, which
-  parses a sting and return a date in ISO8601 format. You may
-  override #date method if you like.
+* While you're scanning, use the 2 helper methods for cleaning each
+  peace: \#clean, which removed duplicate spaces and \#date, which parses
+  a string and return a date in ISO8601 format. You may override \#date
+  method if you like.
 === Options
-Plugins can have _options_ and a user should provide then to the plugin
+Plugins can have _options_ and a user should provide them to the plugin
 in the real-time. For example, say you're scraping a site where many
 users are wasting their time. If you want to watch for several of them
 it is silly to write a new plugin every time for a new
@@ -130,20 +133,20 @@ _option_ becomes mandatory for the end-user.
 To test how nice your plugin works, save the html page to the file
 and type:
-  % bwkfanboy_parse -vv path/to/a/plugin.rb < saved_page.html
+  % bwkfanboy_parse -vv /path/to/the/plugin.rb < saved_page.html
 to see the result as in plain text, or
-  % bwkfanboy_parse -v path/to/a/plugin.rb < saved_page.html
+  % bwkfanboy_parse -v /path/to/the/plugin.rb < saved_page.html
-as pretty JSON.
+as a pretty JSON.
 For option-enabled plugins, supply additional parameters for them after
 the plugin path:
-  % bwkfanboy_parse -vv path/to/a/plugin.rb \
-    option_1 "options 2" < saved_page.html
+  % bwkfanboy_parse -vv /path/to/the/plugin.rb \
+    option_1 "option 2" < saved_page.html
 <tt>bwkfanboy_parse</tt> return 0 if no errors occurred or >= 1 if you
 have errors in your plugin code. N.B.: the output from
-<tt>bwkparser_parse</tt> is always in UTF-8.
+<tt>bwkparser_parse</tt> *must* always be in UTF-8.

data/lib/bwkfanboy/fetch.rb ADDED Viewed

@@ -0,0 +1,36 @@
+require 'open-uri'
+require_relative 'utils'
+module Bwkfanboy
+  class Fetch
+    # If no block given, return contents of fetch'ed URI. Otherwise,
+    # execute the block with 1 parameter--stream.
+    def self.cat(uri)
+      uri.chomp!
+      Bwkfanboy::Utils.veputs(1, "fetching #{uri}\n")
+      begin
+        open(uri, "User-Agent" => Bwkfanboy::Meta::USER_AGENT) {|f|
+          if defined?(f.meta) && f.status[0] != '200' then
+            Bwkfanboy::Utils.errx(1, "cannot fetch #{uri} : HTTP responce: #{f.status[0]}")
+          end
+          Bwkfanboy::Utils.veputs(1, "charset = #{f.content_type_parse[1][1]}\n") if defined?(f.meta)
+          if block_given?
+            yield f
+          else
+            return f.read
+          end
+        }
+      rescue
+        # typically Errno::ENOENT
+        Bwkfanboy::Utils.errx(1, "cannot fetch: #{$!}");
+      end
+      return ""
+    end
+  end
+end

data/lib/bwkfanboy/generate.rb ADDED Viewed

@@ -0,0 +1,63 @@
+require 'rss/maker'
+require 'date'
+require 'json'
+require 'jsonschema'
+require_relative 'utils'
+module Bwkfanboy
+  class Generate
+    def self.validate(t)
+      schema = Bwkfanboy::Utils.gem_dir_system() + '/schema.js'
+      begin
+        JSON::Schema.validate(t, JSON.parse(File.read(schema)))
+      rescue
+        Bwkfanboy::Utils.errx(1, "JSON validation with schema (#{schema}) failed");
+      end
+    end
+    def self.atom(src)
+      feed = RSS::Maker.make("atom") { |maker|
+        maker.channel.id = src['channel']['id']
+        maker.channel.updated = src['channel']['updated']
+        maker.channel.author = src['channel']['author']
+        maker.channel.title = src['channel']['title']
+        maker.channel.links.new_link {|i|
+          i.href = src['channel']['link']
+          i.rel = 'alternate'
+          i.type = 'text/html' # eh
+        }
+        maker.items.do_sort = true
+        src['x_entries'].each { |i|
+          maker.items.new_item do |item|
+            item.links.new_link {|k|
+              k.href = i['link']
+              k.rel = 'alternate'
+              k.type = 'text/html' # only to make happy crappy pr2nntp gateway
+            }
+            item.title = i['title']
+            item.author = i['author']
+            item.updated = i['updated']
+            item.content.type = src['channel']['x_entries_content_type']
+            case item.content.type
+            when 'text'
+              item.content.content = i['content']
+            when 'html'
+              item.content.content = i['content']
+            else
+              item.content.xhtml = i['content']
+            end
+          end
+        }
+      }
+      return feed
+    end
+  end
+end

data/lib/bwkfanboy/parser.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module Bwkfanboy
   # :include: ../../doc/plugin.rdoc
   class Parse
-    ENTRIES_MAX = 64
+    ENTRIES_MAX = 128
     attr_reader :opt
@@ -18,10 +18,10 @@ module Bwkfanboy
     end
     # Invokes #myparse & checks if it has grabbed something.
-    def parse()
+    def parse(stream)
       @entries = []
       begin
-        myparse()
+        myparse(stream)
       rescue
         @entries = []
         Utils.errx(1, "parsing failed: #{$!}\n\nBacktrace:\n\n#{$!.backtrace.join("\n")}")
@@ -99,7 +99,7 @@ module Bwkfanboy
     protected
     # This *must* be overridden in the child.
-    def myparse()
+    def myparse(stream)
       raise "plugin isn't finished yet"
     end

data/lib/bwkfanboy/plugins/bwk.rb CHANGED Viewed

@@ -8,16 +8,16 @@ class Page < Bwkfanboy::Parse
     URI = 'http://www.dailyprincetonian.com/advanced_search/?author=Brian+Kernighan'
     URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/bwk.html'
     ENC = 'UTF-8'
-    VERSION = 1
+    VERSION = 2
     COPYRIGHT = "See bwkfanboy's LICENSE file"
     TITLE = "Brian Kernighan's articles from Daily Princetonian"
     CONTENT_TYPE = 'html'
   end
-  def myparse()
+  def myparse(stream)
     url = "http://www.dailyprincetonian.com"
-    doc = Nokogiri::HTML(STDIN, nil, Meta::ENC)
+    doc = Nokogiri::HTML(stream, nil, Meta::ENC)
     doc.xpath("//div[@class='article_item']").each {|i|
       t = clean(i.xpath("h2/a").children.text())
       fail 'unable to extract link' if (link = clean(i.xpath("h2/a")[0].attributes['href'].value()).empty?)

data/lib/bwkfanboy/plugins/freebsd-ports-update.rb CHANGED Viewed

@@ -5,7 +5,7 @@ class Page < Bwkfanboy::Parse
     URI = '/usr/ports/UPDATING'
     URI_DEBUG = URI
     ENC = 'ASCII'
-    VERSION = 1
+    VERSION = 2
     COPYRIGHT = "See bwkfanboy's LICENSE file"
     TITLE = "News from FreeBSD ports"
     CONTENT_TYPE = 'text'
@@ -24,7 +24,7 @@ class Page < Bwkfanboy::Parse
     return t
   end
-  def myparse()
+  def myparse(stream)
     re_u = /^(\d{8}):$/
     re_t1 = /^ {2}AFFECTS:\s+(.+)$/
     re_t2 = /^\s+(.+)$/
@@ -33,7 +33,7 @@ class Page < Bwkfanboy::Parse
     ready = false
     mode = nil
     t = l = u = a = c = nil
-    while line = STDIN.gets
+    while line = stream.gets
       line.rstrip!
       if line =~ re_u then

data/lib/bwkfanboy/plugins/quora.rb CHANGED Viewed

@@ -17,17 +17,17 @@ class Page < Bwkfanboy::Parse
     URI = 'http://www.quora.com/#{opt[0]}/answers'
     URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/quora.html'
     ENC = 'UTF-8'
-    VERSION = 4
+    VERSION = 5
     COPYRIGHT = "See bwkfanboy's LICENSE file"
     TITLE = "Last n answers (per-user) from Quora; requires nodejs"
     CONTENT_TYPE = 'html'
   end
-  def myparse()
+  def myparse(stream)
     profile = opt[0] # for example, 'Brandon-Smietana'
     # read stdin
-    doc = Nokogiri::HTML(STDIN, nil, Meta::ENC)
+    doc = Nokogiri::HTML(stream, nil, Meta::ENC)
     # extract & evaluate JavaScript into tstp
     tstp = nil

data/lib/bwkfanboy/utils.rb CHANGED Viewed

@@ -7,7 +7,7 @@ require 'active_support/core_ext/module/attribute_accessors'
 module Bwkfanboy
   module Meta
     NAME = 'bwkfanboy'
-    VERSION = '0.1.3'
+    VERSION = '1.1.4'
     USER_AGENT = "#{NAME}/#{VERSION} (#{RUBY_PLATFORM}; N; #{Encoding.default_external.name}; #{RUBY_ENGINE}; rv:#{RUBY_VERSION}.#{RUBY_PATCHLEVEL})"
     PLUGIN_CLASS = 'Page'
     DIR_TMP = "/tmp/#{Meta::NAME}/#{ENV['USER']}"
@@ -89,7 +89,7 @@ module Bwkfanboy
         # TODO get rid of eval()
         fail "class #{class_name} isn't defined" if (! eval("defined?#{class_name}") || ! eval(class_name).is_a?(Class) )
       rescue LoadError
-        errx(1, "cannot load plugin '#{path}'");
+        errx(1, "cannot load plugin '#{path}' #{$!}");
       rescue Exception
         errx(1, "plugin '#{path}' has errors: #{$!}\n\nBacktrace:\n\n#{$!.backtrace.join("\n")}")
       end

data/test/test_fetch.rb CHANGED Viewed

@@ -1,10 +1,7 @@
-#!/usr/bin/env ruby19
-require 'minitest/autorun'
 require 'digest/md5'
 require_relative '../lib/bwkfanboy/utils'
-require_relative 'ts_utils.rb'
+require_relative 'ts_utils'
 # TODO add HTTP 404 check; drop connection from server during HTTP 200
 # replay...

data/test/test_generate.rb CHANGED Viewed

@@ -1,6 +1,3 @@
-#!/usr/bin/env ruby19
-require 'minitest/autorun'
 require 'digest/md5'
 require_relative '../lib/bwkfanboy/utils'

data/test/test_parse.rb CHANGED Viewed

@@ -1,6 +1,3 @@
-#!/usr/bin/env ruby19
-require 'minitest/autorun'
 require 'digest/md5'
 require_relative '../lib/bwkfanboy/utils'
@@ -17,16 +14,16 @@ class TestParse < MiniTest::Unit::TestCase
   def test_empty_plugin
     cmd CMD
-    r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{@tpath}plugins/empty.rb ")
+    r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{Dir.pwd}/#{@tpath}plugins/empty.rb ")
     assert_equal(1, r[0])
     assert_match(/plugin .+ has errors: class Page isn't defined/, r[1])
   end
   def test_plugin_parse
     cmd CMD
-    r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{@tpath}plugins/bwk.rb < #{@tpath}semis/bwk.html")
+    r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{Dir.pwd}/#{@tpath}../lib/bwkfanboy/plugins/bwk.rb < #{@tpath}semis/bwk.html")
     assert_equal(0, r[0])
-    # bin/bwkfanboy_parse test/plugins/bwk.rb < test/semis/bwk.html | md5
-    assert_equal('371fb5a5c5b5519b5eff085df2d31e18', Digest::MD5.hexdigest(r[2]))
+    # bin/bwkfanboy_parse `pwd`/lib/bwkfanboy/plugins/bwk.rb < test/semis/bwk.html | md5
+    assert_equal('a433a4a27bafb060a41aa85a40808056', Digest::MD5.hexdigest(r[2]))
   end
 end

data/test/test_server.rb CHANGED Viewed

@@ -1,6 +1,3 @@
-#!/usr/bin/env ruby19
-require 'minitest/autorun'
 require 'open-uri'
 require 'digest/md5'

data/test/ts_utils.rb CHANGED Viewed

@@ -1,3 +1,11 @@
+# don't run test automatically
+# if they were invoked as 'gem check -t ...'
+if $0 =~ /gem/
+  require 'minitest/unit'
+else
+  require 'minitest/autorun'
+end
 # return the right directory for _c_
 def cmd(c)
   @tpath = ''

metadata CHANGED Viewed

@@ -3,10 +3,10 @@ name: bwkfanboy
 version: !ruby/object:Gem::Version
   prerelease: false
   segments:
-  - 0
   - 1
-  - 3
-  version: 0.1.3
+  - 1
+  - 4
+  version: 1.1.4
 platform: ruby
 authors:
 - Alexander Gromnitsky
@@ -14,27 +14,29 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-10-29 00:00:00 +03:00
+date: 2010-11-08 00:00:00 +02:00
 default_executable: bwkfanboy
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activesupport
   prerelease: false
   requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
         segments:
         - 3
         - 0
-        - 0
-        version: 3.0.0
+        - 1
+        version: 3.0.1
   type: :runtime
   version_requirements: *id001
 - !ruby/object:Gem::Dependency
   name: nokogiri
   prerelease: false
   requirement: &id002 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
@@ -49,6 +51,7 @@ dependencies:
   name: open4
   prerelease: false
   requirement: &id003 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
@@ -63,6 +66,7 @@ dependencies:
   name: jsonschema
   prerelease: false
   requirement: &id004 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
@@ -84,15 +88,14 @@ executables:
 extensions: []
 extra_rdoc_files:
-- bin/bwkfanboy_generate
-- bin/bwkfanboy_parse
-- bin/bwkfanboy
-- bin/bwkfanboy_server
-- bin/bwkfanboy_fetch
-- doc/plugin.rdoc
-- doc/README.rdoc
 - doc/LICENSE
 - doc/NEWS.rdoc
+- doc/README.rdoc
+- doc/plugin.rdoc
+- doc/bwkfanboy_fetch.rdoc
+- doc/bwkfanboy_generate.rdoc
+- doc/bwkfanboy_parse.rdoc
+- doc/bwkfanboy_server.rdoc
 files:
 - lib/bwkfanboy/plugins/bwk.rb
 - lib/bwkfanboy/plugins/freebsd-ports-update.rb
@@ -101,20 +104,25 @@ files:
 - lib/bwkfanboy/parser.rb
 - lib/bwkfanboy/utils.rb
 - lib/bwkfanboy/schema.js
+- lib/bwkfanboy/fetch.rb
+- lib/bwkfanboy/generate.rb
 - bin/bwkfanboy_generate
 - bin/bwkfanboy_parse
 - bin/bwkfanboy
 - bin/bwkfanboy_server
 - bin/bwkfanboy_fetch
-- doc/plugin.rdoc
-- doc/README.rdoc
 - doc/LICENSE
 - doc/NEWS.rdoc
+- doc/README.rdoc
+- doc/plugin.rdoc
+- doc/bwkfanboy_fetch.rdoc
+- doc/bwkfanboy_generate.rdoc
+- doc/bwkfanboy_parse.rdoc
+- doc/bwkfanboy_server.rdoc
 - README.rdoc
 - Rakefile
 - TODO
 - test/plugins/empty.rb
-- test/plugins/bwk.rb
 - test/semis/bwk.html
 - test/semis/bwk.json
 - test/semis/quora.html
@@ -134,20 +142,23 @@ licenses: []
 post_install_message:
 rdoc_options:
 - -m
-- Bwkfanboy
+- doc/README.rdoc
 - -x
 - plugins
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       segments:
       - 1
       - 9
-      version: "1.9"
+      - 2
+      version: 1.9.2
 required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
@@ -157,7 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project:
-rubygems_version: 1.3.6
+rubygems_version: 1.3.7
 signing_key:
 specification_version: 3
 summary: A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.

data/test/plugins/bwk.rb DELETED Viewed

@@ -1,29 +0,0 @@
-require 'nokogiri'
-class Page < Bwkfanboy::Parse
-  module Meta
-    URI = "html/bwk.html"
-    ENC = 'UTF-8'
-    VERSION = 1
-    COPYRIGHT = '(c) 2010 Alexander Gromnitsky'
-    TITLE = "Brian Kernighan's articles from Daily Princetonian"
-    CONTENT_TYPE = 'html'
-  end
-  def myparse()
-    url = "http://www.dailyprincetonian.com"
-    doc = Nokogiri::HTML(STDIN, nil, Meta::ENC)
-    doc.xpath("//div[@class='article_item']").each {|i|
-      t = clean(i.xpath("h2/a").children.text())
-      fail 'unable to extract link' if (link = clean(i.xpath("h2/a")[0].attributes['href'].value()).empty?)
-      link = clean(i.xpath("h2/a")[0].attributes['href'].value())
-      l = url + link + "print"
-      u = date(i.xpath("h2").children[1].text())
-      a = clean(i.xpath("div/span/a[1]").children.text())
-      c = clean(i.xpath("div[@class='summary']").text())
-      self << { title: t, link: l, updated: u, author: a, content: c }
-    }
-  end
-end