rtika 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +9 -7
- data/Rakefile +0 -3
- data/VERSION +1 -1
- data/lib/rtika.rb +3 -3
- data/rtika.gemspec +2 -2
- metadata +3 -3
data/README.rdoc
CHANGED
@@ -10,13 +10,15 @@ Make sure you're on JRuby first.
|
|
10
10
|
require 'rtika'
|
11
11
|
|
12
12
|
result = RTika::FileParser.parse("mywordfile.doc")
|
13
|
-
puts result.content
|
14
|
-
puts result.title
|
15
|
-
puts result.author
|
16
|
-
|
17
|
-
result = RTika::StringParser.parse("<html
|
18
|
-
|
19
|
-
|
13
|
+
puts result.content # prints out the document's contents
|
14
|
+
puts result.title # fetches title from the doc's metadata
|
15
|
+
puts result.author # fetches author from the doc's metadata
|
16
|
+
|
17
|
+
result = RTika::StringParser.parse("<html>
|
18
|
+
<head><title>MYTITLE</title></head>
|
19
|
+
<body>this is my very ... long ... string</body></html>")
|
20
|
+
puts result.content # returns <body> contents
|
21
|
+
puts result.title # returns <title> contents
|
20
22
|
|
21
23
|
== Note on Patches/Pull Requests
|
22
24
|
|
data/Rakefile
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/rtika.rb
CHANGED
@@ -61,7 +61,7 @@ module RTika
|
|
61
61
|
|
62
62
|
def process
|
63
63
|
input_stream = java.io.ByteArrayInputStream.new(@input_string.to_java.get_bytes)
|
64
|
-
content = RTika::BodyContentHandler.new
|
64
|
+
content = RTika::BodyContentHandler.new(-1)
|
65
65
|
metadata = RTika::Metadata.new
|
66
66
|
|
67
67
|
@parser.parse(input_stream, content, metadata)
|
@@ -78,7 +78,7 @@ module RTika
|
|
78
78
|
|
79
79
|
def process
|
80
80
|
input_stream = java.io.FileInputStream.new(java.io.File.new(@filename))
|
81
|
-
content = RTika::BodyContentHandler.new
|
81
|
+
content = RTika::BodyContentHandler.new(-1)
|
82
82
|
metadata = RTika::Metadata.new
|
83
83
|
metadata.set("filename", File.basename(@filename))
|
84
84
|
|
@@ -97,7 +97,7 @@ module RTika
|
|
97
97
|
|
98
98
|
def process
|
99
99
|
input_stream = java.io.ByteArrayInputStream.new(@content.to_java.get_bytes)
|
100
|
-
content = RTika::BodyContentHandler.new
|
100
|
+
content = RTika::BodyContentHandler.new(-1)
|
101
101
|
metadata = RTika::Metadata.new
|
102
102
|
metadata.set("filename", File.basename(@url))
|
103
103
|
|
data/rtika.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{rtika}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pradeep Elankumaran"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-11-03}
|
13
13
|
s.description = %q{rTika is a JRuby wrapper around the Apache Tika content extraction library}
|
14
14
|
s.email = %q{pradeepe@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
7
|
+
- 2
|
8
8
|
- 0
|
9
|
-
version: 0.
|
9
|
+
version: 0.2.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Pradeep Elankumaran
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-11-03 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|