chupa-text-decomposer-pdf 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/chupa-text-decomposer-pdf.gemspec +3 -2
- data/doc/text/news.md +6 -0
- data/lib/chupa-text/decomposers/pdf.rb +4 -1
- data/test/test-pdf.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e31d3498186c960a9dae07539e83f77ffa5b995
|
4
|
+
data.tar.gz: 6ecc6033cd1420eb5c7dba3540cf9b4bdca569e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 661698768712b75f81cccef8df76cdd86ea76ef95bb38320d0e713c2a2a0a68cc222c62c64219baad59b72788bcb5ff5e0fee0277ef2f74125c3a6e24d1a6ca6
|
7
|
+
data.tar.gz: 54d47d4bbd031c02f8774200177b29c5c112a574f812bc68ee43d20019a00e461e8ca4aad955f6c50913e28578656ffdaa12fdca33e04d603df32f9c79662c62
|
@@ -22,14 +22,15 @@ end
|
|
22
22
|
|
23
23
|
Gem::Specification.new do |spec|
|
24
24
|
spec.name = "chupa-text-decomposer-pdf"
|
25
|
-
spec.version = "1.0.
|
25
|
+
spec.version = "1.0.1"
|
26
26
|
spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-pdf"
|
27
27
|
spec.authors = ["Kouhei Sutou"]
|
28
28
|
spec.email = ["kou@clear-code.com"]
|
29
29
|
readme = File.read("README.md", :encoding => "UTF-8")
|
30
30
|
entries = readme.split(/^\#\#\s(.*)$/)
|
31
31
|
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
32
|
-
spec.summary
|
32
|
+
spec.summary = description.split(/\n\n+/, 2).first
|
33
|
+
spec.description = description
|
33
34
|
spec.license = "LGPLv2.1 or later"
|
34
35
|
spec.files = ["#{spec.name}.gemspec"]
|
35
36
|
spec.files += ["README.md", "LICENSE.txt", "Rakefile", "Gemfile"]
|
data/doc/text/news.md
CHANGED
@@ -32,7 +32,10 @@ module ChupaText
|
|
32
32
|
document = Poppler::Document.new(data.body)
|
33
33
|
text = ""
|
34
34
|
document.each do |page|
|
35
|
-
|
35
|
+
page_text = page.get_text
|
36
|
+
next if page_text.empty?
|
37
|
+
text << "\n" unless text.empty?
|
38
|
+
text << page_text
|
36
39
|
end
|
37
40
|
text_data = TextData.new(text)
|
38
41
|
text_data.uri = data.uri
|
data/test/test-pdf.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text-decomposer-pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: chupa-text
|
@@ -109,6 +109,9 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
description: |
|
112
|
+
This is a ChupaText decomposer plugin for to extract text and
|
113
|
+
meta-data from PDF.
|
114
|
+
|
112
115
|
You can use `pdf` decomposer.
|
113
116
|
email:
|
114
117
|
- kou@clear-code.com
|