rtika 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. data/README.rdoc +9 -7
  2. data/Rakefile +0 -3
  3. data/VERSION +1 -1
  4. data/lib/rtika.rb +3 -3
  5. data/rtika.gemspec +2 -2
  6. metadata +3 -3
@@ -10,13 +10,15 @@ Make sure you're on JRuby first.
10
10
  require 'rtika'
11
11
 
12
12
  result = RTika::FileParser.parse("mywordfile.doc")
13
- puts result.content
14
- puts result.title
15
- puts result.author
16
-
17
- result = RTika::StringParser.parse("<html><body>this is my very ... long ... string</body></html>")
18
- puts result.content
19
- puts result.title
13
+ puts result.content # prints out the document's contents
14
+ puts result.title # fetches title from the doc's metadata
15
+ puts result.author # fetches author from the doc's metadata
16
+
17
+ result = RTika::StringParser.parse("<html>
18
+ <head><title>MYTITLE</title></head>
19
+ <body>this is my very ... long ... string</body></html>")
20
+ puts result.content # returns <body> contents
21
+ puts result.title # returns <title> contents
20
22
 
21
23
  == Note on Patches/Pull Requests
22
24
 
data/Rakefile CHANGED
@@ -50,6 +50,3 @@ Rake::RDocTask.new do |rdoc|
50
50
  rdoc.rdoc_files.include('README*')
51
51
  rdoc.rdoc_files.include('lib/**/*.rb')
52
52
  end
53
-
54
- Jeweler::GemcutterTasks.new
55
-
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
@@ -61,7 +61,7 @@ module RTika
61
61
 
62
62
  def process
63
63
  input_stream = java.io.ByteArrayInputStream.new(@input_string.to_java.get_bytes)
64
- content = RTika::BodyContentHandler.new
64
+ content = RTika::BodyContentHandler.new(-1)
65
65
  metadata = RTika::Metadata.new
66
66
 
67
67
  @parser.parse(input_stream, content, metadata)
@@ -78,7 +78,7 @@ module RTika
78
78
 
79
79
  def process
80
80
  input_stream = java.io.FileInputStream.new(java.io.File.new(@filename))
81
- content = RTika::BodyContentHandler.new
81
+ content = RTika::BodyContentHandler.new(-1)
82
82
  metadata = RTika::Metadata.new
83
83
  metadata.set("filename", File.basename(@filename))
84
84
 
@@ -97,7 +97,7 @@ module RTika
97
97
 
98
98
  def process
99
99
  input_stream = java.io.ByteArrayInputStream.new(@content.to_java.get_bytes)
100
- content = RTika::BodyContentHandler.new
100
+ content = RTika::BodyContentHandler.new(-1)
101
101
  metadata = RTika::Metadata.new
102
102
  metadata.set("filename", File.basename(@url))
103
103
 
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{rtika}
8
- s.version = "0.1.0"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Pradeep Elankumaran"]
12
- s.date = %q{2010-10-28}
12
+ s.date = %q{2010-11-03}
13
13
  s.description = %q{rTika is a JRuby wrapper around the Apache Tika content extraction library}
14
14
  s.email = %q{pradeepe@gmail.com}
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 1
7
+ - 2
8
8
  - 0
9
- version: 0.1.0
9
+ version: 0.2.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Pradeep Elankumaran
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-10-28 00:00:00 -07:00
17
+ date: 2010-11-03 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies: []
20
20