RubyGems - open_events - Versions diffs - 0.0.1 - Mend

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/.gitignore ADDED Viewed

	@@ -0,0 +1 @@
1	+ html_cache/

data/README ADDED Viewed

@@ -0,0 +1,3 @@
+# Open Events
+Description coming

data/Rakefile ADDED Viewed

@@ -0,0 +1,7 @@
+require 'rake'
+require 'rake/testtask'
+require 'bundler'
+Bundler::GemHelper.install_tasks
+$LOAD_PATH.unshift File.join(File.dirname(__FILE__), 'lib')

data/boston/booksmith.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require 'event_scraper'
+class Booksmith < EventScraper
+  def about
+    {
+      title: 'Brookline Booksmith',
+      url: 'http://www.brooklinebooksmith.com/events/mainevent.html',
+      categories: %w(books speakers),
+      locations: %w(coolidge-corner)
+    }
+  end
+  def nodes
+    doc.search('strong a').
+      select {|x|
+        x['href'] =~ %r{^http://www.brooklinebooksmith-shop.com/event/}}.
+      map {|x|
+        x.ancestors.detect {|y| y.name == 'tr'}}
+  end
+  def event(n)
+    return n.inner_html
+    date = if (x = n.at('.entry-meta'))
+             x.inner_text
+           else
+             @res.last && @res.last[:date]
+           end
+    time = (x = n.at('li/text()')) && x.text.strip
+    link = n.at('.entry-title a')['href']
+    {
+      date: date,
+      time: time,
+      title: n.at('.entry-title').inner_text,
+      link: link
+    }
+  end
+end

data/boston/brattle.rb ADDED Viewed

@@ -0,0 +1,34 @@
+require 'event_scraper'
+class Brattle < EventScraper
+  def about
+    {
+      title: 'Brattle Theater Special Events',
+      url: 'http://brattlefilm.org/category/calendar-2/special-events/',
+      categories: %w(movies),
+      locations: %w(harvard-square)
+    }
+  end
+  def nodes
+    doc.at('#calendarframe').xpath('./div')
+  end
+  def event(n)
+    date = if (x = n.at('.entry-meta'))
+             x.inner_text
+           else
+             @res.last && @res.last[:date]
+           end
+    time = (x = n.at('li/text()')) && x.text.strip
+    link = n.at('.entry-title a')['href']
+    {
+      date: date,
+      time: time,
+      title: n.at('.entry-title').inner_text,
+      link: link
+    }
+  end
+end

data/lib/cached_html.rb ADDED Viewed

@@ -0,0 +1,27 @@
+require 'open-uri'
+module CachedHtml
+  CACHE_DIR = 'html_cache'
+  `mkdir -p #{CACHE_DIR}`
+  def filename(url)
+    File.join(CACHE_DIR, munge(url))
+  end
+  def munge(url)
+    url.sub("http://", '').sub(/\W$/, '').gsub('/', '.').gsub(/\W/, '-')
+  end
+  def cached_html(url)
+    if File.size?(filename(url))
+      File.read(filename(url))
+    else
+      puts "Fetching #{url}"
+      res  = open(url)
+      html = res.read
+      File.open(filename(url), 'w') {|f| f.write html}
+      puts "Cached html to #{filename(url)}"
+      html
+    end
+  end
+end

data/lib/event_scraper.rb ADDED Viewed

@@ -0,0 +1,33 @@
+require 'cached_html'
+require 'nokogiri'
+class EventScraper
+  include CachedHtml
+  attr_accessor :doc
+  def initialize
+    @doc = Nokogiri::HTML.parse(self.html)
+  end
+  def html
+    @html ||= cached_html(about[:url])
+  end
+  def parse
+    @res = []
+    nodes.map {|n|
+      @res << event(n)
+    }
+    @res
+  end
+  def parse_test
+    nodes.each {|n|
+      puts n.inner_html
+      puts n.inner_text.gsub(/\s{2,}/, ' ').strip
+      puts '-' * 80
+    }
+  end
+end

data/lib/runner.rb ADDED Viewed

@@ -0,0 +1,29 @@
+$LOAD_PATH.unshift('scrapers')
+# TODO use opt parse to output HTML fragments for interative development
+class Runner
+  def initialize(venue)
+    require venue
+    klass = camelize(venue)
+    puts klass
+    parser = Object.const_get(klass).new
+    puts parser.about.inspect
+    res = parser.parse
+    puts res
+  end
+  # from active support
+  def camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
+    if first_letter_in_uppercase
+      lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
+    else
+      lower_case_and_underscored_word.to_s[0].chr.downcase + camelize(lower_case_and_underscored_word)[1..-1]
+    end
+  end
+end
+if __FILE__ == $0
+  Runner.new(ARGV.first)
+end

data/notes.txt ADDED Viewed

@@ -0,0 +1,9 @@
+A curated simple event scraper for select Cambridge, MA places.
+Plus Sinatra app.
+Develop most the scrapers on the command line, then develop the Sinatra
+app.

data/open_events.gemspec ADDED Viewed

@@ -0,0 +1,25 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+#require "open_events/version"
+Gem::Specification.new do |s|
+  s.name        = "open_events"
+  s.version     = "0.0.1"
+  s.platform    = Gem::Platform::RUBY
+  s.required_ruby_version = '>= 1.9.0'
+  s.authors     = ["Daniel Choi"]
+  s.email       = ["dhchoi@gmail.com"]
+  #s.homepage    = "http://danielchoi.com/software/open_events.html"
+  s.summary     = %q{Events listings web scrapers and tools}
+  s.description = %q{An open-source repository of events listings web scrapers and tools}
+  s.rubyforge_project = "open_events"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+  s.add_dependency 'nokogiri'
+end

metadata ADDED Viewed

@@ -0,0 +1,76 @@
+--- !ruby/object:Gem::Specification
+name: open_events
+version: !ruby/object:Gem::Version
+  prerelease:
+  version: 0.0.1
+platform: ruby
+authors:
+- Daniel Choi
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-04-18 00:00:00 -04:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0"
+  type: :runtime
+  version_requirements: *id001
+description: An open-source repository of events listings web scrapers and tools
+email:
+- dhchoi@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- README
+- Rakefile
+- boston/booksmith.rb
+- boston/brattle.rb
+- lib/cached_html.rb
+- lib/event_scraper.rb
+- lib/runner.rb
+- notes.txt
+- open_events.gemspec
+has_rdoc: true
+homepage:
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: 1.9.0
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+requirements: []
+rubyforge_project: open_events
+rubygems_version: 1.6.1
+signing_key:
+specification_version: 3
+summary: Events listings web scrapers and tools
+test_files: []

open_events 0.0.1