boilerpipe 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.textile ADDED
@@ -0,0 +1,21 @@
1
+ This gem is a ruby wrapper for the Boilerpipe API.
2
+ Boilerpipe definition:
3
+
4
+ bq. The boilerpipe library provides algorithms to detect and remove the surplus "clutter" (boilerplate, templates) around the main textual content of a web page.
5
+
6
+ For more information: http://code.google.com/p/boilerpipe/
7
+
8
+ h1. Explication
9
+
10
+ The Boilerpipe module has only one method which is extract. Extract takes 2 parameters, first the url and second a hash.
11
+ The hash can have 3 options:
12
+ * output => :html, :htmlFragment, :text, :json, :debug
13
+ * extractor => :ArticleExtractor, :DefaultExtractor, :LargestContentExtractor, :KeepEverythingExtractor, :CanolaExtractor
14
+ * api: => The api url
15
+
16
+ None of these options are mandatory. To find out more about these options checkout the Boilerpipe API http://boilerpipe-web.appspot.com/
17
+
18
+ h1. Example
19
+
20
+ >> require "boilerpipe"
21
+ >> Boilerpipe.extract("http://techcrunch.com/2011/05/12/karma-is-a-bitch/", {:output => :json})
data/Rakefile ADDED
File without changes
@@ -0,0 +1,20 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "boilerpipe"
3
+ s.version = "0.0.1"
4
+ s.date = "2010-05-13"
5
+ s.summary = "Ruby wrapper of the Boilerpipe API"
6
+ s.email = "g.marcilhacy@gmail.com"
7
+ s.homepage = "https://github.com/gregorym/boilerpipe"
8
+ s.description = "Ruby wrapper of the Boilerpipe API"
9
+ s.has_rdoc = false
10
+ s.authors = ["Grégory Marcilhacy"]
11
+
12
+ s.require_paths = %w[lib]
13
+
14
+ s.files = %w[
15
+ boilerpipe.gemspec
16
+ README.textile
17
+ Rakefile
18
+ lib/boilerpipe.rb
19
+ ]
20
+ end
data/lib/boilerpipe.rb ADDED
@@ -0,0 +1,26 @@
1
+ require 'open-uri'
2
+
3
+ class Object
4
+ def blank?
5
+ respond_to?(:empty?) ? empty? : !self
6
+ end
7
+
8
+ def present?
9
+ !blank?
10
+ end
11
+ end
12
+
13
+ module Boilerpipe
14
+ DEFAULT_API_URL = 'http://boilerpipe-web.appspot.com/extract'
15
+ EXTRACTORS = [ :ArticleExtractor, :DefaultExtractor, :LargestContentExtractor, :KeepEverythingExtractor, :CanolaExtractor ]
16
+ OUTPUT_FORMATS = [ :html, :htmlFragment, :text, :json, :debug ]
17
+
18
+ def self.extract(extract_url, opts = {})
19
+ @output = opts[:output].present? ? opts[:output] : OUTPUT_FORMATS.first
20
+ @extractor = opts[:extractor].present? ? opts[:extractor] : EXTRACTORS.first
21
+ @api = opts[:api].present? ? opts[:api] : DEFAULT_API_URL
22
+
23
+ url = [@api, "?url=#{extract_url}", "&extractor=#{@extractor}","&output=#{@output}"].join
24
+ open(url).read
25
+ end
26
+ end
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: boilerpipe
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - "Gr\xC3\xA9gory Marcilhacy"
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-05-13 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Ruby wrapper of the Boilerpipe API
23
+ email: g.marcilhacy@gmail.com
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - boilerpipe.gemspec
32
+ - README.textile
33
+ - Rakefile
34
+ - lib/boilerpipe.rb
35
+ has_rdoc: true
36
+ homepage: https://github.com/gregorym/boilerpipe
37
+ licenses: []
38
+
39
+ post_install_message:
40
+ rdoc_options: []
41
+
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ none: false
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ hash: 3
50
+ segments:
51
+ - 0
52
+ version: "0"
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ hash: 3
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ requirements: []
63
+
64
+ rubyforge_project:
65
+ rubygems_version: 1.6.2
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Ruby wrapper of the Boilerpipe API
69
+ test_files: []
70
+