chupa-text 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +3 -1
- data/chupa-text.gemspec +3 -3
- data/data/mime-types.conf +3 -0
- data/doc/text/news.md +15 -0
- data/lib/chupa-text/command/chupa-text-generate-decomposer.rb +5 -5
- data/lib/chupa-text/command/chupa-text.rb +15 -1
- data/lib/chupa-text/data.rb +22 -2
- data/lib/chupa-text/default-logger.rb +5 -1
- data/lib/chupa-text/error.rb +11 -2
- data/lib/chupa-text/extractor.rb +13 -11
- data/lib/chupa-text/formatters.rb +1 -0
- data/lib/chupa-text/formatters/json.rb +7 -6
- data/lib/chupa-text/formatters/text.rb +22 -0
- data/lib/chupa-text/input-data.rb +3 -3
- data/lib/chupa-text/text-data.rb +3 -3
- data/lib/chupa-text/version.rb +1 -1
- data/lib/chupa-text/virtual-file-data.rb +3 -3
- metadata +71 -71
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7bb1a550ae5f986d102ceb5b13c542201a1d630
|
4
|
+
data.tar.gz: 0ca5560f911c3f19ab018f8b0cb37780a2738e25
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0bfc9e586bfbcd10ba60fdfa982fc3027e0e3b2433874fd5870ece5396201e42871de772d6eb10c6e39b32172ea18e21cc84944e48182f70dc72918b949719a8
|
7
|
+
data.tar.gz: 40e10ff0742ae1a5b98733c8a64ea3f10934da02c30b834e8bfba0914271b31bbc04a151ddef19f8f6a11101f4de7590afc7f3d4901caca37e3d7d7c1ce4207a
|
data/Rakefile
CHANGED
data/chupa-text.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -38,7 +38,7 @@ Gem::Specification.new do |spec|
|
|
38
38
|
entries = readme.split(/^\#\#\s(.*)$/)
|
39
39
|
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
40
40
|
spec.summary, spec.description, = description.split(/\n\n+/, 3)
|
41
|
-
spec.license = "
|
41
|
+
spec.license = "LGPL-2.1+"
|
42
42
|
spec.files = ["#{spec.name}.gemspec"]
|
43
43
|
spec.files += ["README.md", "LICENSE.txt", "Rakefile", "Gemfile"]
|
44
44
|
spec.files += [".yardopts"]
|
data/data/mime-types.conf
CHANGED
@@ -34,3 +34,6 @@ mime_types["xlsx"] =
|
|
34
34
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
35
35
|
mime_types["pptx"] =
|
36
36
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
37
|
+
|
38
|
+
mime_types["eml"] = "message/rfc822"
|
39
|
+
mime_types["mew"] = "message/rfc822"
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 1.0.5: 2017-05-02
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added `message/rfc822` MIME type association with `.eml` and
|
8
|
+
`.mew` into the default MIME type list.
|
9
|
+
|
10
|
+
* Searched decomposer even if MIME type is `text/plain`.
|
11
|
+
|
12
|
+
* `ChupaText::Data#initialize`: Accepted source data.
|
13
|
+
|
14
|
+
* `ChupaText::UnknownEncodingError`: Added.
|
15
|
+
|
16
|
+
* Added plain text formatter.
|
17
|
+
|
3
18
|
## 1.0.4: 2014-02-17
|
4
19
|
|
5
20
|
* Removed a needless optimization.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -76,7 +76,7 @@ module ChupaText
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def lgplv2_1_or_later_license
|
79
|
-
"
|
79
|
+
"LGPL-2.1+"
|
80
80
|
end
|
81
81
|
|
82
82
|
def create_option_parser
|
@@ -154,7 +154,7 @@ module ChupaText
|
|
154
154
|
def generate_gemspec
|
155
155
|
create_file("#{gem_name}.gemspec") do |file|
|
156
156
|
file.puts(<<-GEMSPEC)
|
157
|
-
# -*-
|
157
|
+
# -*- ruby -*-
|
158
158
|
|
159
159
|
Gem::Specification.new do |spec|
|
160
160
|
spec.name = "#{gem_name}"
|
@@ -183,7 +183,7 @@ end
|
|
183
183
|
def generate_gemfile
|
184
184
|
create_file("Gemfile") do |file|
|
185
185
|
file.puts(<<-Gemfile)
|
186
|
-
# -*-
|
186
|
+
# -*- ruby -*-
|
187
187
|
|
188
188
|
source "https://rubygems.org/"
|
189
189
|
|
@@ -195,7 +195,7 @@ gemspec
|
|
195
195
|
def generate_rakefile
|
196
196
|
create_file("Rakefile") do |file|
|
197
197
|
file.puts(<<-RAKEFILE)
|
198
|
-
# -*-
|
198
|
+
# -*- ruby -*-
|
199
199
|
|
200
200
|
require "bundler/gem_tasks"
|
201
201
|
|
@@ -26,10 +26,13 @@ module ChupaText
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
+
AVAILABLE_FORMATS = [:json, :text]
|
30
|
+
|
29
31
|
def initialize
|
30
32
|
@input = nil
|
31
33
|
@configuration = Configuration.default
|
32
34
|
@enable_gems = true
|
35
|
+
@format = :json
|
33
36
|
end
|
34
37
|
|
35
38
|
def run(*arguments)
|
@@ -89,6 +92,12 @@ module ChupaText
|
|
89
92
|
"Appends PATH to decomposer load path.") do |path|
|
90
93
|
$LOAD_PATH << path
|
91
94
|
end
|
95
|
+
parser.on("--format=FORMAT", AVAILABLE_FORMATS,
|
96
|
+
"Output FORMAT.",
|
97
|
+
"[#{AVAILABLE_FORMATS.join(', ')}]",
|
98
|
+
"(default: json)") do |format|
|
99
|
+
@format = format
|
100
|
+
end
|
92
101
|
|
93
102
|
parser.separator("")
|
94
103
|
parser.separator("Log related options:")
|
@@ -150,7 +159,12 @@ module ChupaText
|
|
150
159
|
end
|
151
160
|
|
152
161
|
def create_formatter
|
153
|
-
|
162
|
+
case @format
|
163
|
+
when :json
|
164
|
+
Formatters::JSON.new($stdout)
|
165
|
+
when :text
|
166
|
+
Formatters::Text.new($stdout)
|
167
|
+
end
|
154
168
|
end
|
155
169
|
end
|
156
170
|
end
|
data/lib/chupa-text/data.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -51,7 +51,7 @@ module ChupaText
|
|
51
51
|
# archive data in {#source}.
|
52
52
|
attr_accessor :source
|
53
53
|
|
54
|
-
def initialize
|
54
|
+
def initialize(options={})
|
55
55
|
@uri = nil
|
56
56
|
@body = nil
|
57
57
|
@size = nil
|
@@ -59,6 +59,9 @@ module ChupaText
|
|
59
59
|
@mime_type = nil
|
60
60
|
@attributes = Attributes.new
|
61
61
|
@source = nil
|
62
|
+
@options = options || {}
|
63
|
+
source_data = @options[:source_data]
|
64
|
+
merge!(source_data) if source_data
|
62
65
|
end
|
63
66
|
|
64
67
|
def initialize_copy(object)
|
@@ -67,6 +70,23 @@ module ChupaText
|
|
67
70
|
self
|
68
71
|
end
|
69
72
|
|
73
|
+
# Merges metadata from data.
|
74
|
+
#
|
75
|
+
# @param [Data] data The data to be merged.
|
76
|
+
#
|
77
|
+
# @return [void]
|
78
|
+
def merge!(data)
|
79
|
+
self.uri = data.uri
|
80
|
+
self.path = data.path
|
81
|
+
data.attributes.each do |name, value|
|
82
|
+
self[name] = value
|
83
|
+
end
|
84
|
+
if data.mime_type
|
85
|
+
self["source-mime-types"] ||= []
|
86
|
+
self["source-mime-types"].unshift(data.mime_type)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
70
90
|
# @param [String, URI, nil] uri The URI for the data. If `uri` is
|
71
91
|
# `nil`, it means that the data isn't associated with any URIs.
|
72
92
|
def uri=(uri)
|
@@ -137,7 +137,11 @@ module ChupaText
|
|
137
137
|
def format_message(message)
|
138
138
|
case message
|
139
139
|
when String
|
140
|
-
message
|
140
|
+
if message.end_with?("\n")
|
141
|
+
message
|
142
|
+
else
|
143
|
+
"#{message}\n"
|
144
|
+
end
|
141
145
|
when Exception
|
142
146
|
"#{message.message}(#{message.class})\n" +
|
143
147
|
(message.backtrace || []).join("\n")
|
data/lib/chupa-text/error.rb
CHANGED
@@ -22,7 +22,7 @@ module ChupaText
|
|
22
22
|
attr_reader :data
|
23
23
|
def initialize(data)
|
24
24
|
@data = data
|
25
|
-
super("Encrypted data: <#{data.
|
25
|
+
super("Encrypted data: <#{data.uri}>(#{data.mime_type})")
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
@@ -31,7 +31,16 @@ module ChupaText
|
|
31
31
|
def initialize(data, detail)
|
32
32
|
@data = data
|
33
33
|
@detail = detail
|
34
|
-
super("Invalid data: <#{data.
|
34
|
+
super("Invalid data: <#{data.uri}>(#{data.mime_type}): <#{detail}>")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class UnknownEncodingError < Error
|
39
|
+
attr_reader :data, :encoding
|
40
|
+
def initialize(data, encoding)
|
41
|
+
@data = data
|
42
|
+
@encoding = encoding
|
43
|
+
super("Unknown encoding data: <#{data.uri}>(#{data.mime_type}): <#{encoding}>")
|
35
44
|
end
|
36
45
|
end
|
37
46
|
end
|
data/lib/chupa-text/extractor.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -60,26 +60,28 @@ module ChupaText
|
|
60
60
|
def extract(input)
|
61
61
|
targets = [ensure_data(input)]
|
62
62
|
until targets.empty?
|
63
|
-
target = targets.
|
63
|
+
target = targets.shift
|
64
64
|
debug do
|
65
|
-
"#{log_tag}[extract][target] <#{target.
|
66
|
-
end
|
67
|
-
if target.text_plain?
|
68
|
-
yield(target)
|
69
|
-
next
|
65
|
+
"#{log_tag}[extract][target] <#{target.uri}>:<#{target.mime_type}>"
|
70
66
|
end
|
71
67
|
decomposer = find_decomposer(target)
|
72
68
|
if decomposer.nil?
|
73
|
-
|
74
|
-
|
75
|
-
|
69
|
+
if target.text_plain?
|
70
|
+
debug {"#{log_tag}[extract][text-plain]"}
|
71
|
+
yield(target)
|
72
|
+
next
|
73
|
+
else
|
74
|
+
debug {"#{log_tag}[extract][decomposer] not found"}
|
75
|
+
yield(target) if target.text?
|
76
|
+
next
|
77
|
+
end
|
76
78
|
end
|
77
79
|
debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"}
|
78
80
|
decomposer.decompose(target) do |decomposed|
|
79
81
|
debug do
|
80
82
|
"#{log_tag}[extract][decomposed] " +
|
81
83
|
"#{decomposer.class}: " +
|
82
|
-
"<#{target.
|
84
|
+
"<#{target.uri}>:<#{target.mime_type}> -> " +
|
83
85
|
"<#{decomposed.mime_type}>"
|
84
86
|
end
|
85
87
|
targets.push(decomposed)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -21,23 +21,24 @@ module ChupaText
|
|
21
21
|
class JSON
|
22
22
|
def initialize(output)
|
23
23
|
@output = output
|
24
|
-
@
|
24
|
+
@texts = []
|
25
25
|
end
|
26
26
|
|
27
27
|
def format_start(data)
|
28
|
-
format_headers(data, @formatted)
|
29
|
-
@formatted["texts"] = []
|
30
28
|
end
|
31
29
|
|
32
30
|
def format_extracted(data)
|
33
31
|
text = {}
|
34
32
|
format_headers(data, text)
|
35
33
|
text["body"] = data.body
|
36
|
-
@
|
34
|
+
@texts << text
|
37
35
|
end
|
38
36
|
|
39
37
|
def format_finish(data)
|
40
|
-
|
38
|
+
formatted = {}
|
39
|
+
format_headers(data, formatted)
|
40
|
+
formatted["texts"] = @texts
|
41
|
+
@output << ::JSON.pretty_generate(formatted)
|
41
42
|
@output << "\n"
|
42
43
|
end
|
43
44
|
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ChupaText
|
2
|
+
module Formatters
|
3
|
+
class Text
|
4
|
+
def initialize(output)
|
5
|
+
@output = output
|
6
|
+
@texts = []
|
7
|
+
end
|
8
|
+
|
9
|
+
def format_start(data)
|
10
|
+
end
|
11
|
+
|
12
|
+
def format_extracted(data)
|
13
|
+
@texts << data.body
|
14
|
+
end
|
15
|
+
|
16
|
+
def format_finish(data)
|
17
|
+
@output << @texts.join("\n\x0c\n")
|
18
|
+
@output << "\n"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -19,8 +19,8 @@ require "open-uri"
|
|
19
19
|
|
20
20
|
module ChupaText
|
21
21
|
class InputData < Data
|
22
|
-
def initialize(uri)
|
23
|
-
super()
|
22
|
+
def initialize(uri, options={})
|
23
|
+
super(options)
|
24
24
|
self.uri = uri
|
25
25
|
if @uri.class == URI::Generic
|
26
26
|
@content = FileContent.new(@uri.path)
|
data/lib/chupa-text/text-data.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -16,8 +16,8 @@
|
|
16
16
|
|
17
17
|
module ChupaText
|
18
18
|
class TextData < Data
|
19
|
-
def initialize(text)
|
20
|
-
super()
|
19
|
+
def initialize(text, options={})
|
20
|
+
super(options)
|
21
21
|
self.mime_type = "text/plain"
|
22
22
|
self.body = text
|
23
23
|
self.size = text.bytesize
|
data/lib/chupa-text/version.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -16,8 +16,8 @@
|
|
16
16
|
|
17
17
|
module ChupaText
|
18
18
|
class VirtualFileData < Data
|
19
|
-
def initialize(uri, input)
|
20
|
-
super()
|
19
|
+
def initialize(uri, input, options={})
|
20
|
+
super(options)
|
21
21
|
self.uri = uri
|
22
22
|
if @uri
|
23
23
|
path = @uri.path
|
metadata
CHANGED
@@ -1,83 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: test-unit
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: packnga
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: redcarpet
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
description: ''
|
@@ -89,85 +89,86 @@ executables:
|
|
89
89
|
extensions: []
|
90
90
|
extra_rdoc_files: []
|
91
91
|
files:
|
92
|
-
-
|
93
|
-
-
|
92
|
+
- ".yardopts"
|
93
|
+
- Gemfile
|
94
94
|
- LICENSE.txt
|
95
|
+
- README.md
|
95
96
|
- Rakefile
|
96
|
-
-
|
97
|
-
-
|
97
|
+
- bin/chupa-text
|
98
|
+
- bin/chupa-text-generate-decomposer
|
99
|
+
- chupa-text.gemspec
|
98
100
|
- data/chupa-text.conf
|
99
101
|
- data/mime-types.conf
|
100
|
-
-
|
101
|
-
-
|
102
|
-
-
|
103
|
-
-
|
104
|
-
- lib/chupa-text
|
102
|
+
- doc/text/command-line.md
|
103
|
+
- doc/text/decomposer.md
|
104
|
+
- doc/text/library.md
|
105
|
+
- doc/text/news.md
|
106
|
+
- lib/chupa-text.rb
|
107
|
+
- lib/chupa-text/attributes.rb
|
108
|
+
- lib/chupa-text/command.rb
|
105
109
|
- lib/chupa-text/command/chupa-text-generate-decomposer.rb
|
110
|
+
- lib/chupa-text/command/chupa-text.rb
|
111
|
+
- lib/chupa-text/configuration-loader.rb
|
106
112
|
- lib/chupa-text/configuration.rb
|
113
|
+
- lib/chupa-text/data.rb
|
107
114
|
- lib/chupa-text/decomposer-registry.rb
|
108
|
-
- lib/chupa-text/
|
109
|
-
- lib/chupa-text/
|
110
|
-
- lib/chupa-text/virtual-file-data.rb
|
111
|
-
- lib/chupa-text/size-parser.rb
|
112
|
-
- lib/chupa-text/formatters/json.rb
|
113
|
-
- lib/chupa-text/virtual-content.rb
|
115
|
+
- lib/chupa-text/decomposer.rb
|
116
|
+
- lib/chupa-text/decomposers.rb
|
114
117
|
- lib/chupa-text/decomposers/csv.rb
|
118
|
+
- lib/chupa-text/decomposers/gzip.rb
|
115
119
|
- lib/chupa-text/decomposers/tar.rb
|
116
120
|
- lib/chupa-text/decomposers/xml.rb
|
117
|
-
- lib/chupa-text/
|
121
|
+
- lib/chupa-text/default-logger.rb
|
118
122
|
- lib/chupa-text/error.rb
|
119
|
-
- lib/chupa-text/formatters.rb
|
120
|
-
- lib/chupa-text/decomposers.rb
|
121
|
-
- lib/chupa-text/command.rb
|
122
|
-
- lib/chupa-text/file-content.rb
|
123
|
-
- lib/chupa-text/decomposer.rb
|
124
123
|
- lib/chupa-text/external-command.rb
|
125
|
-
- lib/chupa-text/
|
126
|
-
- lib/chupa-text/
|
124
|
+
- lib/chupa-text/extractor.rb
|
125
|
+
- lib/chupa-text/file-content.rb
|
126
|
+
- lib/chupa-text/formatters.rb
|
127
|
+
- lib/chupa-text/formatters/json.rb
|
128
|
+
- lib/chupa-text/formatters/text.rb
|
127
129
|
- lib/chupa-text/input-data.rb
|
130
|
+
- lib/chupa-text/loggable.rb
|
131
|
+
- lib/chupa-text/logger.rb
|
128
132
|
- lib/chupa-text/mime-type-registry.rb
|
129
|
-
- lib/chupa-text/
|
130
|
-
- lib/chupa-text/
|
131
|
-
- lib/chupa-text.rb
|
132
|
-
-
|
133
|
-
-
|
134
|
-
-
|
135
|
-
- doc/text/library.md
|
136
|
-
- test/test-decomposers.rb
|
137
|
-
- test/test-default-logger.rb
|
138
|
-
- test/test-decomposer.rb
|
139
|
-
- test/test-virtual-content.rb
|
133
|
+
- lib/chupa-text/mime-type.rb
|
134
|
+
- lib/chupa-text/size-parser.rb
|
135
|
+
- lib/chupa-text/text-data.rb
|
136
|
+
- lib/chupa-text/version.rb
|
137
|
+
- lib/chupa-text/virtual-content.rb
|
138
|
+
- lib/chupa-text/virtual-file-data.rb
|
140
139
|
- test/command/test-chupa-text.rb
|
141
|
-
- test/helper.rb
|
142
|
-
- test/test-configuration-loader.rb
|
143
|
-
- test/test-mime-type-registry.rb
|
144
|
-
- test/test-decomposer-registry.rb
|
145
|
-
- test/fixture/gzip/hello.txt.gz
|
146
|
-
- test/fixture/gzip/hello.tgz
|
147
|
-
- test/fixture/gzip/hello.tar.gz
|
148
|
-
- test/fixture/tar/top-level.tar
|
149
|
-
- test/fixture/tar/directory.tar
|
150
|
-
- test/fixture/command/chupa-text/hello.txt.gz
|
151
|
-
- test/fixture/command/chupa-text/no-decomposer.conf
|
152
|
-
- test/fixture/command/chupa-text/hello.txt
|
153
|
-
- test/fixture/extractor/hello.txt
|
154
|
-
- test/test-attributes.rb
|
155
|
-
- test/test-external-command.rb
|
156
|
-
- test/test-size-parser.rb
|
157
140
|
- test/decomposers/test-csv.rb
|
158
141
|
- test/decomposers/test-gzip.rb
|
159
142
|
- test/decomposers/test-tar.rb
|
160
143
|
- test/decomposers/test-xml.rb
|
144
|
+
- test/fixture/command/chupa-text/hello.txt
|
145
|
+
- test/fixture/command/chupa-text/hello.txt.gz
|
146
|
+
- test/fixture/command/chupa-text/no-decomposer.conf
|
147
|
+
- test/fixture/extractor/hello.txt
|
148
|
+
- test/fixture/gzip/hello.tar.gz
|
149
|
+
- test/fixture/gzip/hello.tgz
|
150
|
+
- test/fixture/gzip/hello.txt.gz
|
151
|
+
- test/fixture/tar/directory.tar
|
152
|
+
- test/fixture/tar/top-level.tar
|
153
|
+
- test/helper.rb
|
154
|
+
- test/run-test.rb
|
155
|
+
- test/test-attributes.rb
|
156
|
+
- test/test-configuration-loader.rb
|
161
157
|
- test/test-data.rb
|
162
|
-
- test/test-
|
158
|
+
- test/test-decomposer-registry.rb
|
159
|
+
- test/test-decomposer.rb
|
160
|
+
- test/test-decomposers.rb
|
161
|
+
- test/test-default-logger.rb
|
162
|
+
- test/test-external-command.rb
|
163
163
|
- test/test-extractor.rb
|
164
|
-
- test/
|
164
|
+
- test/test-file-content.rb
|
165
|
+
- test/test-mime-type-registry.rb
|
166
|
+
- test/test-size-parser.rb
|
165
167
|
- test/test-text-data.rb
|
166
|
-
-
|
167
|
-
- bin/chupa-text-generate-decomposer
|
168
|
+
- test/test-virtual-content.rb
|
168
169
|
homepage: http://ranguba.org/#about-chupa-text
|
169
170
|
licenses:
|
170
|
-
-
|
171
|
+
- LGPL-2.1+
|
171
172
|
metadata: {}
|
172
173
|
post_install_message:
|
173
174
|
rdoc_options: []
|
@@ -175,20 +176,19 @@ require_paths:
|
|
175
176
|
- lib
|
176
177
|
required_ruby_version: !ruby/object:Gem::Requirement
|
177
178
|
requirements:
|
178
|
-
- -
|
179
|
+
- - ">="
|
179
180
|
- !ruby/object:Gem::Version
|
180
181
|
version: '0'
|
181
182
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
182
183
|
requirements:
|
183
|
-
- -
|
184
|
+
- - ">="
|
184
185
|
- !ruby/object:Gem::Version
|
185
186
|
version: '0'
|
186
187
|
requirements: []
|
187
188
|
rubyforge_project:
|
188
|
-
rubygems_version: 2.
|
189
|
+
rubygems_version: 2.5.2
|
189
190
|
signing_key:
|
190
191
|
specification_version: 4
|
191
192
|
summary: ChupaText is an extensible text extractor. You can plug your custom text
|
192
193
|
extractor in ChupaText. You can write your plugin by Ruby.
|
193
194
|
test_files: []
|
194
|
-
has_rdoc:
|