pismo 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/pismo.rb ADDED
@@ -0,0 +1,44 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+ require 'fast_stemmer'
4
+ require 'chronic'
5
+
6
+ $: << File.dirname(__FILE__)
7
+ require 'pismo/document'
8
+ require 'pismo/readability'
9
+
10
+ module Pismo
11
+ # Sugar method to make creating document objects nicer
12
+ def self.document(handle, url = nil)
13
+ Document.new(handle, url)
14
+ end
15
+
16
+ class NFunctions
17
+ def self.match_href(list, expression)
18
+ list.find_all { |node| node['href'] =~ /#{expression}/ }
19
+ end
20
+ end
21
+ end
22
+
23
+ # Add some sugar to Nokogiri
24
+ class Nokogiri::HTML::Document
25
+ def get_the(search)
26
+ self.search(search).first rescue nil
27
+ end
28
+
29
+ def match(*queries)
30
+ queries.each do |query|
31
+ if query.is_a?(String)
32
+ result = self.search(query).first.inner_text.strip rescue nil
33
+ elsif query.is_a?(Array)
34
+ result = query[1].call(self.search(query.first).first).strip rescue nil
35
+ end
36
+ if result
37
+ result.gsub!(/\342\200\231/, '\'')
38
+ result.gsub!(/\342\200\224/, '-')
39
+ return result
40
+ end
41
+ end
42
+ return nil
43
+ end
44
+ end
data/pismo.gemspec ADDED
@@ -0,0 +1,92 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{pismo}
8
+ s.version = "0.2.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Peter Cooper"]
12
+ s.date = %q{2010-03-26}
13
+ s.default_executable = %q{pismo}
14
+ s.description = %q{Pismo extracts and retrieves content-related metadata from HTML pages - you can use the resulting data in an organized way, such as a summary/first paragraph, del.icio.us tags, first image used in the content block, etc.}
15
+ s.email = %q{git@peterc.org}
16
+ s.executables = ["pismo"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ ".gitignore",
24
+ "LICENSE",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bin/pismo",
29
+ "lib/pismo.rb",
30
+ "lib/pismo/document.rb",
31
+ "lib/pismo/external_attributes.rb",
32
+ "lib/pismo/internal_attributes.rb",
33
+ "lib/pismo/readability.rb",
34
+ "lib/pismo/stopwords.txt",
35
+ "pismo.gemspec",
36
+ "test/corpus/bbcnews.html",
37
+ "test/corpus/briancray.html",
38
+ "test/corpus/cant_read.html",
39
+ "test/corpus/factor.html",
40
+ "test/corpus/huffington.html",
41
+ "test/corpus/metadata_expected.yaml",
42
+ "test/corpus/rubyinside.html",
43
+ "test/corpus/rww.html",
44
+ "test/corpus/spolsky.html",
45
+ "test/corpus/techcrunch.html",
46
+ "test/corpus/youtube.html",
47
+ "test/helper.rb",
48
+ "test/test_corpus.rb",
49
+ "test/test_pismo_document.rb",
50
+ "test/test_readability.rb"
51
+ ]
52
+ s.homepage = %q{http://github.com/peterc/pismo}
53
+ s.rdoc_options = ["--charset=UTF-8"]
54
+ s.require_paths = ["lib"]
55
+ s.rubygems_version = %q{1.3.5}
56
+ s.summary = %q{Extracts or retrieves content-related metadata from HTML pages}
57
+ s.test_files = [
58
+ "test/helper.rb",
59
+ "test/test_corpus.rb",
60
+ "test/test_pismo_document.rb",
61
+ "test/test_readability.rb"
62
+ ]
63
+
64
+ if s.respond_to? :specification_version then
65
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
66
+ s.specification_version = 3
67
+
68
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
69
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
70
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
71
+ s.add_runtime_dependency(%q<loofah>, [">= 0"])
72
+ s.add_runtime_dependency(%q<httparty>, [">= 0"])
73
+ s.add_runtime_dependency(%q<fast-stemmer>, [">= 0"])
74
+ s.add_runtime_dependency(%q<chronic>, [">= 0"])
75
+ else
76
+ s.add_dependency(%q<shoulda>, [">= 0"])
77
+ s.add_dependency(%q<nokogiri>, [">= 0"])
78
+ s.add_dependency(%q<loofah>, [">= 0"])
79
+ s.add_dependency(%q<httparty>, [">= 0"])
80
+ s.add_dependency(%q<fast-stemmer>, [">= 0"])
81
+ s.add_dependency(%q<chronic>, [">= 0"])
82
+ end
83
+ else
84
+ s.add_dependency(%q<shoulda>, [">= 0"])
85
+ s.add_dependency(%q<nokogiri>, [">= 0"])
86
+ s.add_dependency(%q<loofah>, [">= 0"])
87
+ s.add_dependency(%q<httparty>, [">= 0"])
88
+ s.add_dependency(%q<fast-stemmer>, [">= 0"])
89
+ s.add_dependency(%q<chronic>, [">= 0"])
90
+ end
91
+ end
92
+