chupa-text-decomposer-pdf 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +5 -3
- data/chupa-text-decomposer-pdf.gemspec +3 -3
- data/doc/text/news.md +8 -0
- data/lib/chupa-text/decomposers/pdf.rb +2 -3
- data/test/test-pdf.rb +9 -4
- metadata +31 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '083dd5ec66cd3d8859092f81abcc0d3e5c6f3929'
|
4
|
+
data.tar.gz: 8ec37a765b3402bf97f3f93a5ef5748aa8257499
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3114a7e40b3c25cca8294337108276b1f204224bf8e815201145ab97959eac8387aacbb2c786860732624e2ec282be98793c00b4a9b8e6546eb60cc122cbc0d1
|
7
|
+
data.tar.gz: c68f85e85556afa87c7926c862ab3bb429439048220738b8aeb33798bcb125b9729a175c98ac4f8bf3cb17837734776f6e9a18fb4b86460ff049875183c2e967
|
data/Rakefile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -34,7 +34,9 @@ end
|
|
34
34
|
helper.install
|
35
35
|
spec = helper.gemspec
|
36
36
|
|
37
|
-
Packnga::DocumentTask.new(spec) do
|
37
|
+
Packnga::DocumentTask.new(spec) do |task|
|
38
|
+
task.original_language = "en"
|
39
|
+
task.translate_language = "ja"
|
38
40
|
end
|
39
41
|
|
40
42
|
Packnga::ReleaseTask.new(spec) do
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- mode: ruby; coding: utf-8 -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
# Copyright (C) 2013-2014 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -22,7 +22,7 @@ end
|
|
22
22
|
|
23
23
|
Gem::Specification.new do |spec|
|
24
24
|
spec.name = "chupa-text-decomposer-pdf"
|
25
|
-
spec.version = "1.0.
|
25
|
+
spec.version = "1.0.3"
|
26
26
|
spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-pdf"
|
27
27
|
spec.authors = ["Kouhei Sutou"]
|
28
28
|
spec.email = ["kou@clear-code.com"]
|
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
32
32
|
spec.summary = description.split(/\n\n+/, 2).first
|
33
33
|
spec.description = description
|
34
|
-
spec.license = "
|
34
|
+
spec.license = "LGPL-2.1+"
|
35
35
|
spec.files = ["#{spec.name}.gemspec"]
|
36
36
|
spec.files += ["README.md", "LICENSE.txt", "Rakefile", "Gemfile"]
|
37
37
|
spec.files += [".yardopts"]
|
data/doc/text/news.md
CHANGED
@@ -24,7 +24,7 @@ module ChupaText
|
|
24
24
|
registry.register("pdf", self)
|
25
25
|
|
26
26
|
def target?(data)
|
27
|
-
data.extension == "pdf" or
|
27
|
+
(data.extension == "pdf" and data.body.start_with?("%PDF-1")) or
|
28
28
|
data.mime_type == "application/pdf"
|
29
29
|
end
|
30
30
|
|
@@ -37,8 +37,7 @@ module ChupaText
|
|
37
37
|
text << page_text
|
38
38
|
text << "\n" unless page_text.end_with?("\n")
|
39
39
|
end
|
40
|
-
text_data = TextData.new(text)
|
41
|
-
text_data.uri = data.uri
|
40
|
+
text_data = TextData.new(text, :source_data => data)
|
42
41
|
add_attribute(text_data, document, :title)
|
43
42
|
add_attribute(text_data, document, :author)
|
44
43
|
add_attribute(text_data, document, :subject)
|
data/test/test-pdf.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013-
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -37,15 +37,20 @@ class TestPDF < Test::Unit::TestCase
|
|
37
37
|
data = ChupaText::Data.new
|
38
38
|
data.body = ""
|
39
39
|
data.uri = uri
|
40
|
+
data.mime_type = "application/octet-stream"
|
40
41
|
data
|
41
42
|
end
|
42
43
|
|
43
44
|
def test_pdf
|
44
|
-
|
45
|
+
assert do
|
46
|
+
decomposer.target?(create_data("index.pdf"))
|
47
|
+
end
|
45
48
|
end
|
46
49
|
|
47
50
|
def test_html
|
48
|
-
|
51
|
+
assert do
|
52
|
+
not decomposer.target?(create_data("index.html"))
|
53
|
+
end
|
49
54
|
end
|
50
55
|
end
|
51
56
|
|
@@ -105,7 +110,7 @@ class TestPDF < Test::Unit::TestCase
|
|
105
110
|
end
|
106
111
|
|
107
112
|
def test_created_time
|
108
|
-
assert_equal([Time.parse("2014-01-
|
113
|
+
assert_equal([Time.parse("2014-01-05T06:52:45Z")],
|
109
114
|
decompose("created_time"))
|
110
115
|
end
|
111
116
|
|
metadata
CHANGED
@@ -1,111 +1,111 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text-decomposer-pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: chupa-text
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: poppler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: test-unit
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: packnga
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: redcarpet
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
description: |
|
@@ -119,27 +119,27 @@ executables: []
|
|
119
119
|
extensions: []
|
120
120
|
extra_rdoc_files: []
|
121
121
|
files:
|
122
|
-
-
|
123
|
-
-
|
122
|
+
- ".yardopts"
|
123
|
+
- Gemfile
|
124
124
|
- LICENSE.txt
|
125
|
+
- README.md
|
125
126
|
- Rakefile
|
126
|
-
-
|
127
|
-
- .yardopts
|
128
|
-
- lib/chupa-text/decomposers/pdf.rb
|
127
|
+
- chupa-text-decomposer-pdf.gemspec
|
129
128
|
- doc/text/news.md
|
130
|
-
-
|
131
|
-
- test/fixture/
|
132
|
-
- test/fixture/multi-pages.pdf
|
133
|
-
- test/fixture/encrypted.pdf
|
129
|
+
- lib/chupa-text/decomposers/pdf.rb
|
130
|
+
- test/fixture/attributes.odt
|
134
131
|
- test/fixture/attributes.pdf
|
135
132
|
- test/fixture/encrypted.odt
|
136
|
-
- test/fixture/
|
133
|
+
- test/fixture/encrypted.pdf
|
137
134
|
- test/fixture/multi-pages.odt
|
138
|
-
- test/fixture/
|
135
|
+
- test/fixture/multi-pages.pdf
|
136
|
+
- test/fixture/one-page.odt
|
137
|
+
- test/fixture/one-page.pdf
|
139
138
|
- test/run-test.rb
|
139
|
+
- test/test-pdf.rb
|
140
140
|
homepage: https://github.com/ranguba/chupa-text-decomposer-pdf
|
141
141
|
licenses:
|
142
|
-
-
|
142
|
+
- LGPL-2.1+
|
143
143
|
metadata: {}
|
144
144
|
post_install_message:
|
145
145
|
rdoc_options: []
|
@@ -147,20 +147,19 @@ require_paths:
|
|
147
147
|
- lib
|
148
148
|
required_ruby_version: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
|
-
- -
|
150
|
+
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
153
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
154
|
requirements:
|
155
|
-
- -
|
155
|
+
- - ">="
|
156
156
|
- !ruby/object:Gem::Version
|
157
157
|
version: '0'
|
158
158
|
requirements: []
|
159
159
|
rubyforge_project:
|
160
|
-
rubygems_version: 2.
|
160
|
+
rubygems_version: 2.5.2
|
161
161
|
signing_key:
|
162
162
|
specification_version: 4
|
163
163
|
summary: This is a ChupaText decomposer plugin for to extract text and meta-data from
|
164
164
|
PDF.
|
165
165
|
test_files: []
|
166
|
-
has_rdoc:
|