jruby-boilerpipe 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 7e5ce4c67c60120978ee89b3f1f12fd78004ea7d
4
- data.tar.gz: 4f0e3c53f036054f2063a72ff5cfc704c3d8d15b
2
+ SHA256:
3
+ metadata.gz: 8a36efcb938933662d44ad33cd1887e99c34edf1376d36fc383a5a0424b4b797
4
+ data.tar.gz: ccdab7fda06435dfc91effb80f1edae0d810c6ef44b728a3e60710275a08d3da
5
5
  SHA512:
6
- metadata.gz: cfd82a4cdf0feec6f052694ba1fb0bdf63aa80973c48fee3a22f74d9aff1c8c39c2fc75310cb255fc2f6d2c0166571323e37382ec782e8b2ec38c2fb8ecd40be
7
- data.tar.gz: 02f2d69c4c36055e3728fc23bba42da998a6891496e442905d75d8f0c818238f9eb1f469cd3442f4dd4c18e0fe26087f70d1e4b6c545323bf8a20654ddb153a2
6
+ metadata.gz: cda5bbaf4fb99bfa863bcc718d66145481c23c7ded8ccfc57b6a8e6a7280d9e89f67c00c388719f3652727c027158dc19f92f84646bfdc89320f1f05f3e5f74c
7
+ data.tar.gz: e5bfef4c89ebbe6ba8658fcb653d6605f9a6253eaaa14ff9f13b23840a273c6ba4f495c6099d33852d4c68d655c9da2324606ed4c349412abafda3292f4e8608
@@ -0,0 +1,23 @@
1
+ module Boilerpipe
2
+ java_import 'com.kohlschutter.boilerpipe.extractors.ArticleExtractor'
3
+ java_import 'com.kohlschutter.boilerpipe.util.UnicodeTokenizer'
4
+ java_import java.net.URL
5
+
6
+ class ArticleExtractor
7
+ def self.get_text(s)
8
+ url = nil
9
+
10
+ begin
11
+ url = Java::JavaNet::URL.new(s)
12
+ rescue Java::JavaNet::MalformedURLException => e
13
+ # not a URL
14
+ end
15
+ input = url ? url : s
16
+ ArticleExtractor::INSTANCE.get_text(input)
17
+ end
18
+
19
+ class <<self
20
+ alias_method :text, :get_text
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,17 @@
1
+ module Boilerpipe
2
+ module SAX
3
+ java_import 'com.kohlschutter.boilerpipe.sax.BoilerpipeHTMLParser'
4
+ java_import 'org.xml.sax.InputSource'
5
+ java_import java.io.StringReader
6
+
7
+ class BoilerpipeHTMLParser
8
+ def self.parse(text)
9
+ parser = BoilerpipeHTMLParser.new
10
+ string_reader = StringReader.new(text)
11
+ is = InputSource.new(string_reader)
12
+ parser.parse(is)
13
+ parser.to_text_document
14
+ end
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,3 @@
1
1
  module Boilerpipe
2
- VERSION = '0.0.2'
2
+ VERSION = '0.0.3'
3
3
  end
data/lib/boilerpipe.rb CHANGED
@@ -1,26 +1,4 @@
1
- require 'boilerpipe/version'
2
1
  require_relative 'boilerpipe-common-2.0-SNAPSHOT-jar-with-dependencies.jar'
3
-
4
- module Boilerpipe
5
- java_import 'com.kohlschutter.boilerpipe.extractors.ArticleExtractor'
6
- java_import 'com.kohlschutter.boilerpipe.util.UnicodeTokenizer'
7
- java_import java.net.URL
8
-
9
- class ArticleExtractor
10
- def self.get_text(s)
11
- url = nil
12
-
13
- begin
14
- url = Java::JavaNet::URL.new(s)
15
- rescue Java::JavaNet::MalformedURLException => e
16
- # not a URL
17
- end
18
- input = url ? url : s
19
- ArticleExtractor::INSTANCE.get_text(input)
20
- end
21
-
22
- class <<self
23
- alias_method :text, :get_text
24
- end
25
- end
26
- end
2
+ require 'boilerpipe/version'
3
+ require 'boilerpipe/sax/boilerpipe_html_parser'
4
+ require 'boilerpipe/extractors/article_extractor'
metadata CHANGED
@@ -1,52 +1,52 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jruby-boilerpipe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregory Ostermayr
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-08-30 00:00:00.000000000 Z
11
+ date: 2017-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
16
  - - "~>"
18
17
  - !ruby/object:Gem::Version
19
18
  version: '1.10'
20
- type: :development
19
+ name: bundler
21
20
  prerelease: false
21
+ type: :development
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.10'
27
27
  - !ruby/object:Gem::Dependency
28
- name: rake
29
28
  requirement: !ruby/object:Gem::Requirement
30
29
  requirements:
31
30
  - - "~>"
32
31
  - !ruby/object:Gem::Version
33
32
  version: '10.0'
34
- type: :development
33
+ name: rake
35
34
  prerelease: false
35
+ type: :development
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rspec
43
42
  requirement: !ruby/object:Gem::Requirement
44
43
  requirements:
45
44
  - - ">="
46
45
  - !ruby/object:Gem::Version
47
46
  version: '0'
48
- type: :development
47
+ name: rspec
49
48
  prerelease: false
49
+ type: :development
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
@@ -69,11 +69,13 @@ files:
69
69
  - jruby-boilerpipe.gemspec
70
70
  - lib/boilerpipe-common-2.0-SNAPSHOT-jar-with-dependencies.jar
71
71
  - lib/boilerpipe.rb
72
+ - lib/boilerpipe/extractors/article_extractor.rb
73
+ - lib/boilerpipe/sax/boilerpipe_html_parser.rb
72
74
  - lib/boilerpipe/version.rb
73
75
  homepage: https://github.com/gregors/jruby-boilerpipe
74
76
  licenses: []
75
77
  metadata: {}
76
- post_install_message:
78
+ post_install_message:
77
79
  rdoc_options: []
78
80
  require_paths:
79
81
  - lib
@@ -88,9 +90,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
90
  - !ruby/object:Gem::Version
89
91
  version: '0'
90
92
  requirements: []
91
- rubyforge_project:
92
- rubygems_version: 2.4.5
93
- signing_key:
93
+ rubyforge_project:
94
+ rubygems_version: 2.6.11
95
+ signing_key:
94
96
  specification_version: 4
95
97
  summary: Ruby wrapper around boilerpipe java library
96
98
  test_files: []