yomu 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +4 -4
- data/jar/{tika-app-1.1.jar → tika-app-1.2.jar} +0 -0
- data/lib/yomu.rb +27 -8
- data/lib/yomu/version.rb +2 -2
- metadata +4 -5
data/README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# Yomu 読む
|
2
|
-
[Yomu](http://erol.github.com/yomu) is a library for extracting text and metadata from files and documents using the [Apache
|
2
|
+
[Yomu](http://erol.github.com/yomu) is a library for extracting text and metadata from files and documents using the [Apache Tika](http://tika.apache.org/) content analysis toolkit.
|
3
3
|
|
4
4
|
Here are some of the formats supported:
|
5
5
|
|
@@ -10,7 +10,7 @@ Here are some of the formats supported:
|
|
10
10
|
- Rich Text Format (.rtf)
|
11
11
|
- Portable Document Format (.pdf)
|
12
12
|
|
13
|
-
For the complete list of supported formats, please visit the Apache
|
13
|
+
For the complete list of supported formats, please visit the Apache Tika
|
14
14
|
[Supported Document Formats](http://tika.apache.org/0.9/formats.html) page.
|
15
15
|
|
16
16
|
## Installation and Dependencies
|
@@ -27,7 +27,7 @@ Or install it yourself as:
|
|
27
27
|
|
28
28
|
$ gem install yomu
|
29
29
|
|
30
|
-
**Yomu packages the Apache
|
30
|
+
**Yomu packages the Apache Tika application jar and requires a working JRE for it to work.**
|
31
31
|
|
32
32
|
## Usage
|
33
33
|
|
@@ -70,4 +70,4 @@ Yomu can also read from a stream or any object that responds to `read`, includin
|
|
70
70
|
3. Create tests and make them pass ( `rake test` )
|
71
71
|
4. Commit your changes ( `git commit -am 'Added some feature'` )
|
72
72
|
5. Push to the branch ( `git push origin my-new-feature` )
|
73
|
-
6. Create a new Pull Request
|
73
|
+
6. Create a new Pull Request
|
Binary file
|
data/lib/yomu.rb
CHANGED
@@ -5,7 +5,7 @@ require 'yaml'
|
|
5
5
|
|
6
6
|
class Yomu
|
7
7
|
GEMPATH = File.dirname(File.dirname(__FILE__))
|
8
|
-
JARPATH = File.join(Yomu::GEMPATH, 'jar', 'tika-app-1.
|
8
|
+
JARPATH = File.join(Yomu::GEMPATH, 'jar', 'tika-app-1.2.jar')
|
9
9
|
|
10
10
|
# Read text or metadata from a data buffer.
|
11
11
|
#
|
@@ -30,7 +30,7 @@ class Yomu
|
|
30
30
|
type == :metadata ? YAML.load(result) : result
|
31
31
|
end
|
32
32
|
|
33
|
-
# Create a new instance of Yomu.
|
33
|
+
# Create a new instance of Yomu with a given document.
|
34
34
|
#
|
35
35
|
# Using a file path:
|
36
36
|
#
|
@@ -40,7 +40,7 @@ class Yomu
|
|
40
40
|
#
|
41
41
|
# Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
42
42
|
#
|
43
|
-
#
|
43
|
+
# From a stream or an object which responds to +read+
|
44
44
|
#
|
45
45
|
# Yomu.new File.open('sample.pages')
|
46
46
|
|
@@ -60,7 +60,7 @@ class Yomu
|
|
60
60
|
end
|
61
61
|
end
|
62
62
|
|
63
|
-
# Returns the text
|
63
|
+
# Returns the text content of the Yomu document.
|
64
64
|
#
|
65
65
|
# yomu = Yomu.new 'sample.pages'
|
66
66
|
# yomu.text
|
@@ -71,30 +71,49 @@ class Yomu
|
|
71
71
|
@text = Yomu.read :text, data
|
72
72
|
end
|
73
73
|
|
74
|
-
# Returns the metadata hash of
|
74
|
+
# Returns the metadata hash of the Yomu document.
|
75
75
|
#
|
76
76
|
# yomu = Yomu.new 'sample.pages'
|
77
77
|
# yomu.metadata['Content-Type']
|
78
|
-
|
78
|
+
|
79
79
|
def metadata
|
80
80
|
return @metadata if defined? @metadata
|
81
81
|
|
82
82
|
@metadata = Yomu.read :metadata, data
|
83
83
|
end
|
84
84
|
|
85
|
+
# Returns +true+ if the Yomu document was specified using a file path.
|
86
|
+
#
|
87
|
+
# yomu = Yomu.new 'sample.pages'
|
88
|
+
# yomu.path? #=> true
|
89
|
+
|
85
90
|
def path?
|
86
91
|
defined? @path
|
87
92
|
end
|
88
93
|
|
94
|
+
# Returns +true+ if the Yomu document was specified using a URI.
|
95
|
+
#
|
96
|
+
# yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
97
|
+
# yomu.uri? #=> true
|
98
|
+
|
89
99
|
def uri?
|
90
100
|
defined? @uri
|
91
101
|
end
|
92
102
|
|
103
|
+
# Returns +true+ if the Yomu document was specified from a stream or an object which responds to +read+.
|
104
|
+
#
|
105
|
+
# file = File.open('sample.pages')
|
106
|
+
# yomu = Yomu.new file
|
107
|
+
# yomu.stream? #=> true
|
108
|
+
|
93
109
|
def stream?
|
94
110
|
defined? @stream
|
95
111
|
end
|
96
112
|
|
97
|
-
|
113
|
+
# Returns the raw/unparsed content of the Yomu document.
|
114
|
+
#
|
115
|
+
# yomu = Yomu.new 'sample.pages'
|
116
|
+
# yomu.data
|
98
117
|
|
99
118
|
def data
|
100
119
|
return @data if defined? @data
|
@@ -109,4 +128,4 @@ class Yomu
|
|
109
128
|
|
110
129
|
@data
|
111
130
|
end
|
112
|
-
end
|
131
|
+
end
|
data/lib/yomu/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
class Yomu
|
2
|
-
VERSION = "0.1.
|
3
|
-
end
|
2
|
+
VERSION = "0.1.2"
|
3
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yomu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-09 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Yomu is a library for extracting text and metadata using the Apache TIKA
|
15
15
|
content analysis toolkit.
|
@@ -25,7 +25,7 @@ files:
|
|
25
25
|
- NOTICE.txt
|
26
26
|
- README.md
|
27
27
|
- Rakefile
|
28
|
-
- jar/tika-app-1.
|
28
|
+
- jar/tika-app-1.2.jar
|
29
29
|
- lib/yomu.rb
|
30
30
|
- lib/yomu/version.rb
|
31
31
|
- test/samples/sample filename with spaces.pages
|
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
53
|
version: '0'
|
54
54
|
requirements: []
|
55
55
|
rubyforge_project:
|
56
|
-
rubygems_version: 1.8.
|
56
|
+
rubygems_version: 1.8.24
|
57
57
|
signing_key:
|
58
58
|
specification_version: 3
|
59
59
|
summary: Yomu is a library for extracting text and metadata using the Apache TIKA
|
@@ -63,4 +63,3 @@ test_files:
|
|
63
63
|
- test/samples/sample.pages
|
64
64
|
- test/test_helper.rb
|
65
65
|
- test/yomu_test.rb
|
66
|
-
has_rdoc:
|