pismo 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/pismo.rb CHANGED
@@ -4,6 +4,7 @@ require 'open-uri'
4
4
  require 'nokogiri'
5
5
  require 'fast_stemmer'
6
6
  require 'chronic'
7
+ require 'tempfile'
7
8
 
8
9
  $: << File.dirname(__FILE__)
9
10
  require 'pismo/document'
@@ -28,8 +29,9 @@ class Nokogiri::HTML::Document
28
29
  self.search(search).first rescue nil
29
30
  end
30
31
 
31
- def match(*queries)
32
- queries.each do |query|
32
+ def match(queries = [], all = false)
33
+ r = [] if all
34
+ [*queries].each do |query|
33
35
  if query.is_a?(String)
34
36
  result = self.search(query).first.inner_text.strip rescue nil
35
37
  elsif query.is_a?(Array)
@@ -41,9 +43,13 @@ class Nokogiri::HTML::Document
41
43
  # result.gsub!(/\342\200\224/, '-')
42
44
  result.gsub!('’', '\'')
43
45
  result.gsub!('—', '-')
44
- return result
46
+ if all
47
+ r << result
48
+ else
49
+ return result
50
+ end
45
51
  end
46
52
  end
47
- return nil
53
+ all && !r.empty? ? r : nil
48
54
  end
49
55
  end
data/pismo.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{pismo}
8
- s.version = "0.2.3"
8
+ s.version = "0.4.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Peter Cooper"]
12
- s.date = %q{2010-05-03}
12
+ s.date = %q{2010-05-15}
13
13
  s.default_executable = %q{pismo}
14
14
  s.description = %q{Pismo extracts and retrieves content-related metadata from HTML pages - you can use the resulting data in an organized way, such as a summary/first paragraph, body text, keywords, RSS feed URL, favicon, etc.}
15
15
  s.email = %q{git@peterc.org}
@@ -2,6 +2,9 @@
2
2
  :rww:
3
3
  :title: "Cartoon: Apple Tablet: Now With Barometer and Bird Call Generator"
4
4
  :feed: http://www.readwriteweb.com/rss.xml
5
+ :feeds:
6
+ - http://www.readwriteweb.com/rss.xml
7
+ - http://www.readwriteweb.com/archives/2010/01/cartoon_apple_tablet_now_with_barometer_and_bird_c.xml
5
8
  :briancray:
6
9
  :title: 5 great examples of popular blog posts that you should know
7
10
  :feed: http://feeds.feedburner.com/briancray/blog
@@ -9,23 +12,37 @@
9
12
  :huffington:
10
13
  :title: Afghans Losing Hope After 8 Years Of War
11
14
  :author: TODD PITMAN
15
+ :authors:
16
+ - TODD PITMAN
17
+ - AP
12
18
  :feed: http://feeds.huffingtonpost.com/huffingtonpost/raw_feed
13
19
  :lede: "KABUL - The man on the motorcycle was going the wrong way down a one-way street, gesturing indignantly for the phalanx of traffic-clogged cars in front of him to move."
14
20
  :bbcnews:
15
21
  :title: Gay Muslims made homeless by family violence
22
+ :titles:
23
+ - Gay Muslims made homeless by family violence
16
24
  :author: Poonam Taneja
25
+ :authors:
26
+ - Poonam Taneja
17
27
  :description: A charity is dealing with more gay Muslims made homeless after fleeing forced marriages and so-called "honour" violence.
18
28
  :lede: A UK charity is dealing with an increasing number of young gay Muslims becoming homeless after fleeing forced marriages and so-called honour violence.
19
29
  :feed: http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/england/rss.xml
20
30
  :factor:
21
31
  :title: Factor's bootstrap process explained
22
32
  :lede: "Separation of concerns between Factor VM and library codeThe Factor VM implements an abstract machine consisting of a data heap of objects, a code heap of machine code blocks, and a set of stacks. The VM loads an image file on startup, which becomes the data and code heap. "
33
+ :ledes:
34
+ - "Separation of concerns between Factor VM and library codeThe Factor VM implements an abstract machine consisting of a data heap of objects, a code heap of machine code blocks, and a set of stacks. The VM loads an image file on startup, which becomes the data and code heap. "
35
+ - Slava Pestov's weblog, primarily about Factor.
23
36
  :youtube:
24
37
  :title: YMO - Rydeen (Official Video)
25
38
  :author: ymo1965
39
+ :authors:
40
+ - ymo1965
26
41
  :spolsky:
27
42
  :title: The Absolute Minimum Every Software Developer Absolutely, Positively Must Know About Unicode and Character Sets (No Excuses!)
28
43
  :description: Haven't mastered the basics of Unicode and character sets? Please don't write another line of code until you've read this article.
44
+ :ledes:
45
+ - Ever wonder about that mysterious Content-Type tag? You know, the one you're supposed to put in HTML and you never quite know what it should be?
29
46
  :author: Joel Spolsky
30
47
  :favicon: /favicon.ico
31
48
  :feed: http://www.joelonsoftware.com/rss.xml
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pismo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Cooper
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-05-03 00:00:00 +01:00
12
+ date: 2010-05-15 00:00:00 +01:00
13
13
  default_executable: pismo
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency