jruby-boilerpipe 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 7e5ce4c67c60120978ee89b3f1f12fd78004ea7d
4
- data.tar.gz: 4f0e3c53f036054f2063a72ff5cfc704c3d8d15b
2
+ SHA256:
3
+ metadata.gz: 8a36efcb938933662d44ad33cd1887e99c34edf1376d36fc383a5a0424b4b797
4
+ data.tar.gz: ccdab7fda06435dfc91effb80f1edae0d810c6ef44b728a3e60710275a08d3da
5
5
  SHA512:
6
- metadata.gz: cfd82a4cdf0feec6f052694ba1fb0bdf63aa80973c48fee3a22f74d9aff1c8c39c2fc75310cb255fc2f6d2c0166571323e37382ec782e8b2ec38c2fb8ecd40be
7
- data.tar.gz: 02f2d69c4c36055e3728fc23bba42da998a6891496e442905d75d8f0c818238f9eb1f469cd3442f4dd4c18e0fe26087f70d1e4b6c545323bf8a20654ddb153a2
6
+ metadata.gz: cda5bbaf4fb99bfa863bcc718d66145481c23c7ded8ccfc57b6a8e6a7280d9e89f67c00c388719f3652727c027158dc19f92f84646bfdc89320f1f05f3e5f74c
7
+ data.tar.gz: e5bfef4c89ebbe6ba8658fcb653d6605f9a6253eaaa14ff9f13b23840a273c6ba4f495c6099d33852d4c68d655c9da2324606ed4c349412abafda3292f4e8608
@@ -0,0 +1,23 @@
1
+ module Boilerpipe
2
+ java_import 'com.kohlschutter.boilerpipe.extractors.ArticleExtractor'
3
+ java_import 'com.kohlschutter.boilerpipe.util.UnicodeTokenizer'
4
+ java_import java.net.URL
5
+
6
+ class ArticleExtractor
7
+ def self.get_text(s)
8
+ url = nil
9
+
10
+ begin
11
+ url = Java::JavaNet::URL.new(s)
12
+ rescue Java::JavaNet::MalformedURLException => e
13
+ # not a URL
14
+ end
15
+ input = url ? url : s
16
+ ArticleExtractor::INSTANCE.get_text(input)
17
+ end
18
+
19
+ class <<self
20
+ alias_method :text, :get_text
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,17 @@
1
+ module Boilerpipe
2
+ module SAX
3
+ java_import 'com.kohlschutter.boilerpipe.sax.BoilerpipeHTMLParser'
4
+ java_import 'org.xml.sax.InputSource'
5
+ java_import java.io.StringReader
6
+
7
+ class BoilerpipeHTMLParser
8
+ def self.parse(text)
9
+ parser = BoilerpipeHTMLParser.new
10
+ string_reader = StringReader.new(text)
11
+ is = InputSource.new(string_reader)
12
+ parser.parse(is)
13
+ parser.to_text_document
14
+ end
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,3 @@
1
1
  module Boilerpipe
2
- VERSION = '0.0.2'
2
+ VERSION = '0.0.3'
3
3
  end
data/lib/boilerpipe.rb CHANGED
@@ -1,26 +1,4 @@
1
- require 'boilerpipe/version'
2
1
  require_relative 'boilerpipe-common-2.0-SNAPSHOT-jar-with-dependencies.jar'
3
-
4
- module Boilerpipe
5
- java_import 'com.kohlschutter.boilerpipe.extractors.ArticleExtractor'
6
- java_import 'com.kohlschutter.boilerpipe.util.UnicodeTokenizer'
7
- java_import java.net.URL
8
-
9
- class ArticleExtractor
10
- def self.get_text(s)
11
- url = nil
12
-
13
- begin
14
- url = Java::JavaNet::URL.new(s)
15
- rescue Java::JavaNet::MalformedURLException => e
16
- # not a URL
17
- end
18
- input = url ? url : s
19
- ArticleExtractor::INSTANCE.get_text(input)
20
- end
21
-
22
- class <<self
23
- alias_method :text, :get_text
24
- end
25
- end
26
- end
2
+ require 'boilerpipe/version'
3
+ require 'boilerpipe/sax/boilerpipe_html_parser'
4
+ require 'boilerpipe/extractors/article_extractor'
metadata CHANGED
@@ -1,52 +1,52 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jruby-boilerpipe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregory Ostermayr
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-08-30 00:00:00.000000000 Z
11
+ date: 2017-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
16
  - - "~>"
18
17
  - !ruby/object:Gem::Version
19
18
  version: '1.10'
20
- type: :development
19
+ name: bundler
21
20
  prerelease: false
21
+ type: :development
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.10'
27
27
  - !ruby/object:Gem::Dependency
28
- name: rake
29
28
  requirement: !ruby/object:Gem::Requirement
30
29
  requirements:
31
30
  - - "~>"
32
31
  - !ruby/object:Gem::Version
33
32
  version: '10.0'
34
- type: :development
33
+ name: rake
35
34
  prerelease: false
35
+ type: :development
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rspec
43
42
  requirement: !ruby/object:Gem::Requirement
44
43
  requirements:
45
44
  - - ">="
46
45
  - !ruby/object:Gem::Version
47
46
  version: '0'
48
- type: :development
47
+ name: rspec
49
48
  prerelease: false
49
+ type: :development
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
@@ -69,11 +69,13 @@ files:
69
69
  - jruby-boilerpipe.gemspec
70
70
  - lib/boilerpipe-common-2.0-SNAPSHOT-jar-with-dependencies.jar
71
71
  - lib/boilerpipe.rb
72
+ - lib/boilerpipe/extractors/article_extractor.rb
73
+ - lib/boilerpipe/sax/boilerpipe_html_parser.rb
72
74
  - lib/boilerpipe/version.rb
73
75
  homepage: https://github.com/gregors/jruby-boilerpipe
74
76
  licenses: []
75
77
  metadata: {}
76
- post_install_message:
78
+ post_install_message:
77
79
  rdoc_options: []
78
80
  require_paths:
79
81
  - lib
@@ -88,9 +90,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
90
  - !ruby/object:Gem::Version
89
91
  version: '0'
90
92
  requirements: []
91
- rubyforge_project:
92
- rubygems_version: 2.4.5
93
- signing_key:
93
+ rubyforge_project:
94
+ rubygems_version: 2.6.11
95
+ signing_key:
94
96
  specification_version: 4
95
97
  summary: Ruby wrapper around boilerpipe java library
96
98
  test_files: []