rtika 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +9 -7
- data/Rakefile +0 -3
- data/VERSION +1 -1
- data/lib/rtika.rb +3 -3
- data/rtika.gemspec +2 -2
- metadata +3 -3
data/README.rdoc
CHANGED
@@ -10,13 +10,15 @@ Make sure you're on JRuby first.
|
|
10
10
|
require 'rtika'
|
11
11
|
|
12
12
|
result = RTika::FileParser.parse("mywordfile.doc")
|
13
|
-
puts result.content
|
14
|
-
puts result.title
|
15
|
-
puts result.author
|
16
|
-
|
17
|
-
result = RTika::StringParser.parse("<html
|
18
|
-
|
19
|
-
|
13
|
+
puts result.content # prints out the document's contents
|
14
|
+
puts result.title # fetches title from the doc's metadata
|
15
|
+
puts result.author # fetches author from the doc's metadata
|
16
|
+
|
17
|
+
result = RTika::StringParser.parse("<html>
|
18
|
+
<head><title>MYTITLE</title></head>
|
19
|
+
<body>this is my very ... long ... string</body></html>")
|
20
|
+
puts result.content # returns <body> contents
|
21
|
+
puts result.title # returns <title> contents
|
20
22
|
|
21
23
|
== Note on Patches/Pull Requests
|
22
24
|
|
data/Rakefile
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/rtika.rb
CHANGED
@@ -61,7 +61,7 @@ module RTika
|
|
61
61
|
|
62
62
|
def process
|
63
63
|
input_stream = java.io.ByteArrayInputStream.new(@input_string.to_java.get_bytes)
|
64
|
-
content = RTika::BodyContentHandler.new
|
64
|
+
content = RTika::BodyContentHandler.new(-1)
|
65
65
|
metadata = RTika::Metadata.new
|
66
66
|
|
67
67
|
@parser.parse(input_stream, content, metadata)
|
@@ -78,7 +78,7 @@ module RTika
|
|
78
78
|
|
79
79
|
def process
|
80
80
|
input_stream = java.io.FileInputStream.new(java.io.File.new(@filename))
|
81
|
-
content = RTika::BodyContentHandler.new
|
81
|
+
content = RTika::BodyContentHandler.new(-1)
|
82
82
|
metadata = RTika::Metadata.new
|
83
83
|
metadata.set("filename", File.basename(@filename))
|
84
84
|
|
@@ -97,7 +97,7 @@ module RTika
|
|
97
97
|
|
98
98
|
def process
|
99
99
|
input_stream = java.io.ByteArrayInputStream.new(@content.to_java.get_bytes)
|
100
|
-
content = RTika::BodyContentHandler.new
|
100
|
+
content = RTika::BodyContentHandler.new(-1)
|
101
101
|
metadata = RTika::Metadata.new
|
102
102
|
metadata.set("filename", File.basename(@url))
|
103
103
|
|
data/rtika.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{rtika}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pradeep Elankumaran"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-11-03}
|
13
13
|
s.description = %q{rTika is a JRuby wrapper around the Apache Tika content extraction library}
|
14
14
|
s.email = %q{pradeepe@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
7
|
+
- 2
|
8
8
|
- 0
|
9
|
-
version: 0.
|
9
|
+
version: 0.2.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Pradeep Elankumaran
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-11-03 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|