yomu 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +4 -4
- data/jar/{tika-app-1.1.jar → tika-app-1.2.jar} +0 -0
- data/lib/yomu.rb +27 -8
- data/lib/yomu/version.rb +2 -2
- metadata +4 -5
data/README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# Yomu 読む
|
2
|
-
[Yomu](http://erol.github.com/yomu) is a library for extracting text and metadata from files and documents using the [Apache
|
2
|
+
[Yomu](http://erol.github.com/yomu) is a library for extracting text and metadata from files and documents using the [Apache Tika](http://tika.apache.org/) content analysis toolkit.
|
3
3
|
|
4
4
|
Here are some of the formats supported:
|
5
5
|
|
@@ -10,7 +10,7 @@ Here are some of the formats supported:
|
|
10
10
|
- Rich Text Format (.rtf)
|
11
11
|
- Portable Document Format (.pdf)
|
12
12
|
|
13
|
-
For the complete list of supported formats, please visit the Apache
|
13
|
+
For the complete list of supported formats, please visit the Apache Tika
|
14
14
|
[Supported Document Formats](http://tika.apache.org/0.9/formats.html) page.
|
15
15
|
|
16
16
|
## Installation and Dependencies
|
@@ -27,7 +27,7 @@ Or install it yourself as:
|
|
27
27
|
|
28
28
|
$ gem install yomu
|
29
29
|
|
30
|
-
**Yomu packages the Apache
|
30
|
+
**Yomu packages the Apache Tika application jar and requires a working JRE for it to work.**
|
31
31
|
|
32
32
|
## Usage
|
33
33
|
|
@@ -70,4 +70,4 @@ Yomu can also read from a stream or any object that responds to `read`, includin
|
|
70
70
|
3. Create tests and make them pass ( `rake test` )
|
71
71
|
4. Commit your changes ( `git commit -am 'Added some feature'` )
|
72
72
|
5. Push to the branch ( `git push origin my-new-feature` )
|
73
|
-
6. Create a new Pull Request
|
73
|
+
6. Create a new Pull Request
|
Binary file
|
data/lib/yomu.rb
CHANGED
@@ -5,7 +5,7 @@ require 'yaml'
|
|
5
5
|
|
6
6
|
class Yomu
|
7
7
|
GEMPATH = File.dirname(File.dirname(__FILE__))
|
8
|
-
JARPATH = File.join(Yomu::GEMPATH, 'jar', 'tika-app-1.
|
8
|
+
JARPATH = File.join(Yomu::GEMPATH, 'jar', 'tika-app-1.2.jar')
|
9
9
|
|
10
10
|
# Read text or metadata from a data buffer.
|
11
11
|
#
|
@@ -30,7 +30,7 @@ class Yomu
|
|
30
30
|
type == :metadata ? YAML.load(result) : result
|
31
31
|
end
|
32
32
|
|
33
|
-
# Create a new instance of Yomu.
|
33
|
+
# Create a new instance of Yomu with a given document.
|
34
34
|
#
|
35
35
|
# Using a file path:
|
36
36
|
#
|
@@ -40,7 +40,7 @@ class Yomu
|
|
40
40
|
#
|
41
41
|
# Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
42
42
|
#
|
43
|
-
#
|
43
|
+
# From a stream or an object which responds to +read+
|
44
44
|
#
|
45
45
|
# Yomu.new File.open('sample.pages')
|
46
46
|
|
@@ -60,7 +60,7 @@ class Yomu
|
|
60
60
|
end
|
61
61
|
end
|
62
62
|
|
63
|
-
# Returns the text
|
63
|
+
# Returns the text content of the Yomu document.
|
64
64
|
#
|
65
65
|
# yomu = Yomu.new 'sample.pages'
|
66
66
|
# yomu.text
|
@@ -71,30 +71,49 @@ class Yomu
|
|
71
71
|
@text = Yomu.read :text, data
|
72
72
|
end
|
73
73
|
|
74
|
-
# Returns the metadata hash of
|
74
|
+
# Returns the metadata hash of the Yomu document.
|
75
75
|
#
|
76
76
|
# yomu = Yomu.new 'sample.pages'
|
77
77
|
# yomu.metadata['Content-Type']
|
78
|
-
|
78
|
+
|
79
79
|
def metadata
|
80
80
|
return @metadata if defined? @metadata
|
81
81
|
|
82
82
|
@metadata = Yomu.read :metadata, data
|
83
83
|
end
|
84
84
|
|
85
|
+
# Returns +true+ if the Yomu document was specified using a file path.
|
86
|
+
#
|
87
|
+
# yomu = Yomu.new 'sample.pages'
|
88
|
+
# yomu.path? #=> true
|
89
|
+
|
85
90
|
def path?
|
86
91
|
defined? @path
|
87
92
|
end
|
88
93
|
|
94
|
+
# Returns +true+ if the Yomu document was specified using a URI.
|
95
|
+
#
|
96
|
+
# yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
97
|
+
# yomu.uri? #=> true
|
98
|
+
|
89
99
|
def uri?
|
90
100
|
defined? @uri
|
91
101
|
end
|
92
102
|
|
103
|
+
# Returns +true+ if the Yomu document was specified from a stream or an object which responds to +read+.
|
104
|
+
#
|
105
|
+
# file = File.open('sample.pages')
|
106
|
+
# yomu = Yomu.new file
|
107
|
+
# yomu.stream? #=> true
|
108
|
+
|
93
109
|
def stream?
|
94
110
|
defined? @stream
|
95
111
|
end
|
96
112
|
|
97
|
-
|
113
|
+
# Returns the raw/unparsed content of the Yomu document.
|
114
|
+
#
|
115
|
+
# yomu = Yomu.new 'sample.pages'
|
116
|
+
# yomu.data
|
98
117
|
|
99
118
|
def data
|
100
119
|
return @data if defined? @data
|
@@ -109,4 +128,4 @@ class Yomu
|
|
109
128
|
|
110
129
|
@data
|
111
130
|
end
|
112
|
-
end
|
131
|
+
end
|
data/lib/yomu/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
class Yomu
|
2
|
-
VERSION = "0.1.
|
3
|
-
end
|
2
|
+
VERSION = "0.1.2"
|
3
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yomu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-09 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Yomu is a library for extracting text and metadata using the Apache TIKA
|
15
15
|
content analysis toolkit.
|
@@ -25,7 +25,7 @@ files:
|
|
25
25
|
- NOTICE.txt
|
26
26
|
- README.md
|
27
27
|
- Rakefile
|
28
|
-
- jar/tika-app-1.
|
28
|
+
- jar/tika-app-1.2.jar
|
29
29
|
- lib/yomu.rb
|
30
30
|
- lib/yomu/version.rb
|
31
31
|
- test/samples/sample filename with spaces.pages
|
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
53
|
version: '0'
|
54
54
|
requirements: []
|
55
55
|
rubyforge_project:
|
56
|
-
rubygems_version: 1.8.
|
56
|
+
rubygems_version: 1.8.24
|
57
57
|
signing_key:
|
58
58
|
specification_version: 3
|
59
59
|
summary: Yomu is a library for extracting text and metadata using the Apache TIKA
|
@@ -63,4 +63,3 @@ test_files:
|
|
63
63
|
- test/samples/sample.pages
|
64
64
|
- test/test_helper.rb
|
65
65
|
- test/yomu_test.rb
|
66
|
-
has_rdoc:
|