pismo 0.2.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/pismo.rb CHANGED
@@ -4,6 +4,7 @@ require 'open-uri'
4
4
  require 'nokogiri'
5
5
  require 'fast_stemmer'
6
6
  require 'chronic'
7
+ require 'tempfile'
7
8
 
8
9
  $: << File.dirname(__FILE__)
9
10
  require 'pismo/document'
@@ -28,8 +29,9 @@ class Nokogiri::HTML::Document
28
29
  self.search(search).first rescue nil
29
30
  end
30
31
 
31
- def match(*queries)
32
- queries.each do |query|
32
+ def match(queries = [], all = false)
33
+ r = [] if all
34
+ [*queries].each do |query|
33
35
  if query.is_a?(String)
34
36
  result = self.search(query).first.inner_text.strip rescue nil
35
37
  elsif query.is_a?(Array)
@@ -41,9 +43,13 @@ class Nokogiri::HTML::Document
41
43
  # result.gsub!(/\342\200\224/, '-')
42
44
  result.gsub!('’', '\'')
43
45
  result.gsub!('—', '-')
44
- return result
46
+ if all
47
+ r << result
48
+ else
49
+ return result
50
+ end
45
51
  end
46
52
  end
47
- return nil
53
+ all && !r.empty? ? r : nil
48
54
  end
49
55
  end
data/pismo.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{pismo}
8
- s.version = "0.2.3"
8
+ s.version = "0.4.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Peter Cooper"]
12
- s.date = %q{2010-05-03}
12
+ s.date = %q{2010-05-15}
13
13
  s.default_executable = %q{pismo}
14
14
  s.description = %q{Pismo extracts and retrieves content-related metadata from HTML pages - you can use the resulting data in an organized way, such as a summary/first paragraph, body text, keywords, RSS feed URL, favicon, etc.}
15
15
  s.email = %q{git@peterc.org}
@@ -2,6 +2,9 @@
2
2
  :rww:
3
3
  :title: "Cartoon: Apple Tablet: Now With Barometer and Bird Call Generator"
4
4
  :feed: http://www.readwriteweb.com/rss.xml
5
+ :feeds:
6
+ - http://www.readwriteweb.com/rss.xml
7
+ - http://www.readwriteweb.com/archives/2010/01/cartoon_apple_tablet_now_with_barometer_and_bird_c.xml
5
8
  :briancray:
6
9
  :title: 5 great examples of popular blog posts that you should know
7
10
  :feed: http://feeds.feedburner.com/briancray/blog
@@ -9,23 +12,37 @@
9
12
  :huffington:
10
13
  :title: Afghans Losing Hope After 8 Years Of War
11
14
  :author: TODD PITMAN
15
+ :authors:
16
+ - TODD PITMAN
17
+ - AP
12
18
  :feed: http://feeds.huffingtonpost.com/huffingtonpost/raw_feed
13
19
  :lede: "KABUL - The man on the motorcycle was going the wrong way down a one-way street, gesturing indignantly for the phalanx of traffic-clogged cars in front of him to move."
14
20
  :bbcnews:
15
21
  :title: Gay Muslims made homeless by family violence
22
+ :titles:
23
+ - Gay Muslims made homeless by family violence
16
24
  :author: Poonam Taneja
25
+ :authors:
26
+ - Poonam Taneja
17
27
  :description: A charity is dealing with more gay Muslims made homeless after fleeing forced marriages and so-called "honour" violence.
18
28
  :lede: A UK charity is dealing with an increasing number of young gay Muslims becoming homeless after fleeing forced marriages and so-called honour violence.
19
29
  :feed: http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/england/rss.xml
20
30
  :factor:
21
31
  :title: Factor's bootstrap process explained
22
32
  :lede: "Separation of concerns between Factor VM and library codeThe Factor VM implements an abstract machine consisting of a data heap of objects, a code heap of machine code blocks, and a set of stacks. The VM loads an image file on startup, which becomes the data and code heap. "
33
+ :ledes:
34
+ - "Separation of concerns between Factor VM and library codeThe Factor VM implements an abstract machine consisting of a data heap of objects, a code heap of machine code blocks, and a set of stacks. The VM loads an image file on startup, which becomes the data and code heap. "
35
+ - Slava Pestov's weblog, primarily about Factor.
23
36
  :youtube:
24
37
  :title: YMO - Rydeen (Official Video)
25
38
  :author: ymo1965
39
+ :authors:
40
+ - ymo1965
26
41
  :spolsky:
27
42
  :title: The Absolute Minimum Every Software Developer Absolutely, Positively Must Know About Unicode and Character Sets (No Excuses!)
28
43
  :description: Haven't mastered the basics of Unicode and character sets? Please don't write another line of code until you've read this article.
44
+ :ledes:
45
+ - Ever wonder about that mysterious Content-Type tag? You know, the one you're supposed to put in HTML and you never quite know what it should be?
29
46
  :author: Joel Spolsky
30
47
  :favicon: /favicon.ico
31
48
  :feed: http://www.joelonsoftware.com/rss.xml
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pismo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Cooper
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-05-03 00:00:00 +01:00
12
+ date: 2010-05-15 00:00:00 +01:00
13
13
  default_executable: pismo
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency