yomu 0.1.9 → 0.1.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 23b6ed327cc94e45aea195294d84bb8dd669f1a6
4
+ data.tar.gz: c5ef3d9857242820b04f7c76f2f90834a7235a62
5
+ SHA512:
6
+ metadata.gz: 195f73de69ed781c97f10fffac2971a258a2f23ede81e6ebbc1ce34400c38de4fccac93b765df37c0ca3ab105a95a682977fcc6c71a3d86f810a674e2226ed0e
7
+ data.tar.gz: a5f562f9de92eedf29a2cf889f1083f03d19928f10998998131c06f3457613e40e06a7fd2f116a88bb74058e1bceb7fe62402aa6204e0fe1a81adcc71cf1c422
data/README.md CHANGED
@@ -1,4 +1,7 @@
1
+ ![Google Analytics](https://ga-beacon.appspot.com/UA-31066891-2/yomu/code)
2
+
1
3
  # Yomu 読む
4
+
2
5
  [Yomu](http://erol.github.com/yomu) is a library for extracting text and metadata from files and documents using the [Apache Tika](http://tika.apache.org/) content analysis toolkit.
3
6
 
4
7
  Here are some of the formats supported:
@@ -17,50 +20,62 @@ For the complete list of supported formats, please visit the Apache Tika
17
20
 
18
21
  Text, metadata and MIME type information can be extracted by calling `Yomu.read` directly:
19
22
 
20
- require 'yomu'
23
+ ```ruby
24
+ require 'yomu'
21
25
 
22
- data = File.read 'sample.pages'
23
- text = Yomu.read :text, data
24
- metadata = Yomu.read :metadata, data
25
- mimetype = Yomu.read :mimetype, data
26
+ data = File.read 'sample.pages'
27
+ text = Yomu.read :text, data
28
+ metadata = Yomu.read :metadata, data
29
+ mimetype = Yomu.read :mimetype, data
30
+ ```
26
31
 
27
32
  ### Reading text from a given filename
28
33
 
29
34
  Create a new instance of Yomu and pass a filename.
30
35
 
31
- yomu = Yomu.new 'sample.pages'
32
- text = yomu.text
36
+ ```ruby
37
+ yomu = Yomu.new 'sample.pages'
38
+ text = yomu.text
39
+ ```
33
40
 
34
41
  ### Reading text from a given URL
35
42
 
36
43
  This is useful for reading remote files, like documents hosted on Amazon S3.
37
44
 
38
- yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
39
- text = yomu.text
45
+ ```ruby
46
+ yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
47
+ text = yomu.text
48
+ ```
40
49
 
41
50
  ### Reading text from a stream
42
51
 
43
52
  Yomu can also read from a stream or any object that responds to `read`, including file uploads from Ruby on Rails or Sinatra.
44
53
 
45
- post '/:name/:filename' do
46
- yomu = Yomu.new params[:data][:tempfile]
47
- yomu.text
48
- end
54
+ ```ruby
55
+ post '/:name/:filename' do
56
+ yomu = Yomu.new params[:data][:tempfile]
57
+ yomu.text
58
+ end
59
+ ```
49
60
 
50
61
  ### Reading metadata
51
62
 
52
63
  Metadata is returned as a hash.
53
64
 
54
- yomu = Yomu.new 'sample.pages'
55
- yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
65
+ ```ruby
66
+ yomu = Yomu.new 'sample.pages'
67
+ yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
68
+ ```
56
69
 
57
70
  ### Reading MIME types
58
71
 
59
72
  MIME type is returned as a MIME::Type object.
60
73
 
61
- yomu = Yomu.new 'sample.docx'
62
- yomu.mimetype.content_type #=> "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
63
- yomu.mimetype.extensions #=> ['docx']
74
+ ```ruby
75
+ yomu = Yomu.new 'sample.docx'
76
+ yomu.mimetype.content_type #=> "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
77
+ yomu.mimetype.extensions #=> ['docx']
78
+ ```
64
79
 
65
80
  ## Installation and Dependencies
66
81
 
@@ -6,7 +6,7 @@ require 'yaml'
6
6
 
7
7
  class Yomu
8
8
  GEMPATH = File.dirname(File.dirname(__FILE__))
9
- JARPATH = File.join(Yomu::GEMPATH, 'jar', 'tika-app-1.*.jar')
9
+ JARPATH = File.join(Yomu::GEMPATH, 'jar', 'tika-app-1.5.jar')
10
10
 
11
11
  # Read text or metadata from a data buffer.
12
12
  #
@@ -18,6 +18,8 @@ class Yomu
18
18
  switch = case type
19
19
  when :text
20
20
  '-t'
21
+ when :html
22
+ '-h'
21
23
  when :metadata
22
24
  '-m'
23
25
  when :mimetype
@@ -33,6 +35,8 @@ class Yomu
33
35
  case type
34
36
  when :text
35
37
  result
38
+ when :html
39
+ result
36
40
  when :metadata
37
41
  YAML.load quote(result)
38
42
  when :mimetype
@@ -81,6 +85,17 @@ class Yomu
81
85
  @text = Yomu.read :text, data
82
86
  end
83
87
 
88
+ # Returns the text content of the Yomu document in HTML.
89
+ #
90
+ # yomu = Yomu.new 'sample.pages'
91
+ # yomu.html
92
+
93
+ def html
94
+ return @text if defined? @text
95
+
96
+ @text = Yomu.read :html, data
97
+ end
98
+
84
99
  # Returns the metadata hash of the Yomu document.
85
100
  #
86
101
  # yomu = Yomu.new 'sample.pages'
@@ -1,3 +1,3 @@
1
1
  class Yomu
2
- VERSION = "0.1.9"
2
+ VERSION = "0.1.10"
3
3
  end
metadata CHANGED
@@ -1,78 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yomu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
5
- prerelease:
4
+ version: 0.1.10
6
5
  platform: ruby
7
6
  authors:
8
7
  - Erol Fornoles
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-07-17 00:00:00.000000000 Z
11
+ date: 2014-02-27 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: mime-types
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
19
  version: '1.23'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ~>
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
26
  version: '1.23'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: bundler
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ~>
31
+ - - "~>"
36
32
  - !ruby/object:Gem::Version
37
33
  version: '1.3'
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ~>
38
+ - - "~>"
44
39
  - !ruby/object:Gem::Version
45
40
  version: '1.3'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: rake
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ! '>='
45
+ - - ">="
52
46
  - !ruby/object:Gem::Version
53
47
  version: '0'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ! '>='
52
+ - - ">="
60
53
  - !ruby/object:Gem::Version
61
54
  version: '0'
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: rspec
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ~>
59
+ - - "~>"
68
60
  - !ruby/object:Gem::Version
69
61
  version: '2.14'
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ~>
66
+ - - "~>"
76
67
  - !ruby/object:Gem::Version
77
68
  version: '2.14'
78
69
  description: Read text and metadata from files and documents (.doc, .docx, .pages,
@@ -83,14 +74,14 @@ executables: []
83
74
  extensions: []
84
75
  extra_rdoc_files: []
85
76
  files:
86
- - .gitignore
87
- - .rspec
77
+ - ".gitignore"
78
+ - ".rspec"
88
79
  - Gemfile
89
80
  - LICENSE
90
81
  - NOTICE.txt
91
82
  - README.md
92
83
  - Rakefile
93
- - jar/tika-app-1.4.jar
84
+ - jar/tika-app-1.5.jar
94
85
  - lib/yomu.rb
95
86
  - lib/yomu/version.rb
96
87
  - spec/helper.rb
@@ -99,32 +90,30 @@ files:
99
90
  - spec/samples/sample.docx
100
91
  - spec/samples/sample.pages
101
92
  - spec/yomu_spec.rb
102
- - test/helper.rb
103
93
  - yomu.gemspec
104
94
  homepage: http://erol.github.com/yomu
105
95
  licenses:
106
96
  - MIT
97
+ metadata: {}
107
98
  post_install_message:
108
99
  rdoc_options: []
109
100
  require_paths:
110
101
  - lib
111
102
  required_ruby_version: !ruby/object:Gem::Requirement
112
- none: false
113
103
  requirements:
114
- - - ! '>='
104
+ - - ">="
115
105
  - !ruby/object:Gem::Version
116
106
  version: '0'
117
107
  required_rubygems_version: !ruby/object:Gem::Requirement
118
- none: false
119
108
  requirements:
120
- - - ! '>='
109
+ - - ">="
121
110
  - !ruby/object:Gem::Version
122
111
  version: '0'
123
112
  requirements: []
124
113
  rubyforge_project:
125
- rubygems_version: 1.8.23
114
+ rubygems_version: 2.2.0
126
115
  signing_key:
127
- specification_version: 3
116
+ specification_version: 4
128
117
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
129
118
  .rtf, .pdf)
130
119
  test_files:
@@ -134,4 +123,3 @@ test_files:
134
123
  - spec/samples/sample.docx
135
124
  - spec/samples/sample.pages
136
125
  - spec/yomu_spec.rb
137
- - test/helper.rb
@@ -1,3 +0,0 @@
1
- require 'bundler/setup'
2
- require 'yomu'
3
- require 'minitest/autorun'