chupa-text 1.0.4 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +3 -1
- data/chupa-text.gemspec +3 -3
- data/data/mime-types.conf +3 -0
- data/doc/text/news.md +15 -0
- data/lib/chupa-text/command/chupa-text-generate-decomposer.rb +5 -5
- data/lib/chupa-text/command/chupa-text.rb +15 -1
- data/lib/chupa-text/data.rb +22 -2
- data/lib/chupa-text/default-logger.rb +5 -1
- data/lib/chupa-text/error.rb +11 -2
- data/lib/chupa-text/extractor.rb +13 -11
- data/lib/chupa-text/formatters.rb +1 -0
- data/lib/chupa-text/formatters/json.rb +7 -6
- data/lib/chupa-text/formatters/text.rb +22 -0
- data/lib/chupa-text/input-data.rb +3 -3
- data/lib/chupa-text/text-data.rb +3 -3
- data/lib/chupa-text/version.rb +1 -1
- data/lib/chupa-text/virtual-file-data.rb +3 -3
- metadata +71 -71
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7bb1a550ae5f986d102ceb5b13c542201a1d630
|
4
|
+
data.tar.gz: 0ca5560f911c3f19ab018f8b0cb37780a2738e25
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0bfc9e586bfbcd10ba60fdfa982fc3027e0e3b2433874fd5870ece5396201e42871de772d6eb10c6e39b32172ea18e21cc84944e48182f70dc72918b949719a8
|
7
|
+
data.tar.gz: 40e10ff0742ae1a5b98733c8a64ea3f10934da02c30b834e8bfba0914271b31bbc04a151ddef19f8f6a11101f4de7590afc7f3d4901caca37e3d7d7c1ce4207a
|
data/Rakefile
CHANGED
data/chupa-text.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -38,7 +38,7 @@ Gem::Specification.new do |spec|
|
|
38
38
|
entries = readme.split(/^\#\#\s(.*)$/)
|
39
39
|
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
40
40
|
spec.summary, spec.description, = description.split(/\n\n+/, 3)
|
41
|
-
spec.license = "
|
41
|
+
spec.license = "LGPL-2.1+"
|
42
42
|
spec.files = ["#{spec.name}.gemspec"]
|
43
43
|
spec.files += ["README.md", "LICENSE.txt", "Rakefile", "Gemfile"]
|
44
44
|
spec.files += [".yardopts"]
|
data/data/mime-types.conf
CHANGED
@@ -34,3 +34,6 @@ mime_types["xlsx"] =
|
|
34
34
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
35
35
|
mime_types["pptx"] =
|
36
36
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
37
|
+
|
38
|
+
mime_types["eml"] = "message/rfc822"
|
39
|
+
mime_types["mew"] = "message/rfc822"
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 1.0.5: 2017-05-02
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added `message/rfc822` MIME type association with `.eml` and
|
8
|
+
`.mew` into the default MIME type list.
|
9
|
+
|
10
|
+
* Searched decomposer even if MIME type is `text/plain`.
|
11
|
+
|
12
|
+
* `ChupaText::Data#initialize`: Accepted source data.
|
13
|
+
|
14
|
+
* `ChupaText::UnknownEncodingError`: Added.
|
15
|
+
|
16
|
+
* Added plain text formatter.
|
17
|
+
|
3
18
|
## 1.0.4: 2014-02-17
|
4
19
|
|
5
20
|
* Removed a needless optimization.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -76,7 +76,7 @@ module ChupaText
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def lgplv2_1_or_later_license
|
79
|
-
"
|
79
|
+
"LGPL-2.1+"
|
80
80
|
end
|
81
81
|
|
82
82
|
def create_option_parser
|
@@ -154,7 +154,7 @@ module ChupaText
|
|
154
154
|
def generate_gemspec
|
155
155
|
create_file("#{gem_name}.gemspec") do |file|
|
156
156
|
file.puts(<<-GEMSPEC)
|
157
|
-
# -*-
|
157
|
+
# -*- ruby -*-
|
158
158
|
|
159
159
|
Gem::Specification.new do |spec|
|
160
160
|
spec.name = "#{gem_name}"
|
@@ -183,7 +183,7 @@ end
|
|
183
183
|
def generate_gemfile
|
184
184
|
create_file("Gemfile") do |file|
|
185
185
|
file.puts(<<-Gemfile)
|
186
|
-
# -*-
|
186
|
+
# -*- ruby -*-
|
187
187
|
|
188
188
|
source "https://rubygems.org/"
|
189
189
|
|
@@ -195,7 +195,7 @@ gemspec
|
|
195
195
|
def generate_rakefile
|
196
196
|
create_file("Rakefile") do |file|
|
197
197
|
file.puts(<<-RAKEFILE)
|
198
|
-
# -*-
|
198
|
+
# -*- ruby -*-
|
199
199
|
|
200
200
|
require "bundler/gem_tasks"
|
201
201
|
|
@@ -26,10 +26,13 @@ module ChupaText
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
+
AVAILABLE_FORMATS = [:json, :text]
|
30
|
+
|
29
31
|
def initialize
|
30
32
|
@input = nil
|
31
33
|
@configuration = Configuration.default
|
32
34
|
@enable_gems = true
|
35
|
+
@format = :json
|
33
36
|
end
|
34
37
|
|
35
38
|
def run(*arguments)
|
@@ -89,6 +92,12 @@ module ChupaText
|
|
89
92
|
"Appends PATH to decomposer load path.") do |path|
|
90
93
|
$LOAD_PATH << path
|
91
94
|
end
|
95
|
+
parser.on("--format=FORMAT", AVAILABLE_FORMATS,
|
96
|
+
"Output FORMAT.",
|
97
|
+
"[#{AVAILABLE_FORMATS.join(', ')}]",
|
98
|
+
"(default: json)") do |format|
|
99
|
+
@format = format
|
100
|
+
end
|
92
101
|
|
93
102
|
parser.separator("")
|
94
103
|
parser.separator("Log related options:")
|
@@ -150,7 +159,12 @@ module ChupaText
|
|
150
159
|
end
|
151
160
|
|
152
161
|
def create_formatter
|
153
|
-
|
162
|
+
case @format
|
163
|
+
when :json
|
164
|
+
Formatters::JSON.new($stdout)
|
165
|
+
when :text
|
166
|
+
Formatters::Text.new($stdout)
|
167
|
+
end
|
154
168
|
end
|
155
169
|
end
|
156
170
|
end
|
data/lib/chupa-text/data.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -51,7 +51,7 @@ module ChupaText
|
|
51
51
|
# archive data in {#source}.
|
52
52
|
attr_accessor :source
|
53
53
|
|
54
|
-
def initialize
|
54
|
+
def initialize(options={})
|
55
55
|
@uri = nil
|
56
56
|
@body = nil
|
57
57
|
@size = nil
|
@@ -59,6 +59,9 @@ module ChupaText
|
|
59
59
|
@mime_type = nil
|
60
60
|
@attributes = Attributes.new
|
61
61
|
@source = nil
|
62
|
+
@options = options || {}
|
63
|
+
source_data = @options[:source_data]
|
64
|
+
merge!(source_data) if source_data
|
62
65
|
end
|
63
66
|
|
64
67
|
def initialize_copy(object)
|
@@ -67,6 +70,23 @@ module ChupaText
|
|
67
70
|
self
|
68
71
|
end
|
69
72
|
|
73
|
+
# Merges metadata from data.
|
74
|
+
#
|
75
|
+
# @param [Data] data The data to be merged.
|
76
|
+
#
|
77
|
+
# @return [void]
|
78
|
+
def merge!(data)
|
79
|
+
self.uri = data.uri
|
80
|
+
self.path = data.path
|
81
|
+
data.attributes.each do |name, value|
|
82
|
+
self[name] = value
|
83
|
+
end
|
84
|
+
if data.mime_type
|
85
|
+
self["source-mime-types"] ||= []
|
86
|
+
self["source-mime-types"].unshift(data.mime_type)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
70
90
|
# @param [String, URI, nil] uri The URI for the data. If `uri` is
|
71
91
|
# `nil`, it means that the data isn't associated with any URIs.
|
72
92
|
def uri=(uri)
|
@@ -137,7 +137,11 @@ module ChupaText
|
|
137
137
|
def format_message(message)
|
138
138
|
case message
|
139
139
|
when String
|
140
|
-
message
|
140
|
+
if message.end_with?("\n")
|
141
|
+
message
|
142
|
+
else
|
143
|
+
"#{message}\n"
|
144
|
+
end
|
141
145
|
when Exception
|
142
146
|
"#{message.message}(#{message.class})\n" +
|
143
147
|
(message.backtrace || []).join("\n")
|
data/lib/chupa-text/error.rb
CHANGED
@@ -22,7 +22,7 @@ module ChupaText
|
|
22
22
|
attr_reader :data
|
23
23
|
def initialize(data)
|
24
24
|
@data = data
|
25
|
-
super("Encrypted data: <#{data.
|
25
|
+
super("Encrypted data: <#{data.uri}>(#{data.mime_type})")
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
@@ -31,7 +31,16 @@ module ChupaText
|
|
31
31
|
def initialize(data, detail)
|
32
32
|
@data = data
|
33
33
|
@detail = detail
|
34
|
-
super("Invalid data: <#{data.
|
34
|
+
super("Invalid data: <#{data.uri}>(#{data.mime_type}): <#{detail}>")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class UnknownEncodingError < Error
|
39
|
+
attr_reader :data, :encoding
|
40
|
+
def initialize(data, encoding)
|
41
|
+
@data = data
|
42
|
+
@encoding = encoding
|
43
|
+
super("Unknown encoding data: <#{data.uri}>(#{data.mime_type}): <#{encoding}>")
|
35
44
|
end
|
36
45
|
end
|
37
46
|
end
|
data/lib/chupa-text/extractor.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -60,26 +60,28 @@ module ChupaText
|
|
60
60
|
def extract(input)
|
61
61
|
targets = [ensure_data(input)]
|
62
62
|
until targets.empty?
|
63
|
-
target = targets.
|
63
|
+
target = targets.shift
|
64
64
|
debug do
|
65
|
-
"#{log_tag}[extract][target] <#{target.
|
66
|
-
end
|
67
|
-
if target.text_plain?
|
68
|
-
yield(target)
|
69
|
-
next
|
65
|
+
"#{log_tag}[extract][target] <#{target.uri}>:<#{target.mime_type}>"
|
70
66
|
end
|
71
67
|
decomposer = find_decomposer(target)
|
72
68
|
if decomposer.nil?
|
73
|
-
|
74
|
-
|
75
|
-
|
69
|
+
if target.text_plain?
|
70
|
+
debug {"#{log_tag}[extract][text-plain]"}
|
71
|
+
yield(target)
|
72
|
+
next
|
73
|
+
else
|
74
|
+
debug {"#{log_tag}[extract][decomposer] not found"}
|
75
|
+
yield(target) if target.text?
|
76
|
+
next
|
77
|
+
end
|
76
78
|
end
|
77
79
|
debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"}
|
78
80
|
decomposer.decompose(target) do |decomposed|
|
79
81
|
debug do
|
80
82
|
"#{log_tag}[extract][decomposed] " +
|
81
83
|
"#{decomposer.class}: " +
|
82
|
-
"<#{target.
|
84
|
+
"<#{target.uri}>:<#{target.mime_type}> -> " +
|
83
85
|
"<#{decomposed.mime_type}>"
|
84
86
|
end
|
85
87
|
targets.push(decomposed)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -21,23 +21,24 @@ module ChupaText
|
|
21
21
|
class JSON
|
22
22
|
def initialize(output)
|
23
23
|
@output = output
|
24
|
-
@
|
24
|
+
@texts = []
|
25
25
|
end
|
26
26
|
|
27
27
|
def format_start(data)
|
28
|
-
format_headers(data, @formatted)
|
29
|
-
@formatted["texts"] = []
|
30
28
|
end
|
31
29
|
|
32
30
|
def format_extracted(data)
|
33
31
|
text = {}
|
34
32
|
format_headers(data, text)
|
35
33
|
text["body"] = data.body
|
36
|
-
@
|
34
|
+
@texts << text
|
37
35
|
end
|
38
36
|
|
39
37
|
def format_finish(data)
|
40
|
-
|
38
|
+
formatted = {}
|
39
|
+
format_headers(data, formatted)
|
40
|
+
formatted["texts"] = @texts
|
41
|
+
@output << ::JSON.pretty_generate(formatted)
|
41
42
|
@output << "\n"
|
42
43
|
end
|
43
44
|
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ChupaText
|
2
|
+
module Formatters
|
3
|
+
class Text
|
4
|
+
def initialize(output)
|
5
|
+
@output = output
|
6
|
+
@texts = []
|
7
|
+
end
|
8
|
+
|
9
|
+
def format_start(data)
|
10
|
+
end
|
11
|
+
|
12
|
+
def format_extracted(data)
|
13
|
+
@texts << data.body
|
14
|
+
end
|
15
|
+
|
16
|
+
def format_finish(data)
|
17
|
+
@output << @texts.join("\n\x0c\n")
|
18
|
+
@output << "\n"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -19,8 +19,8 @@ require "open-uri"
|
|
19
19
|
|
20
20
|
module ChupaText
|
21
21
|
class InputData < Data
|
22
|
-
def initialize(uri)
|
23
|
-
super()
|
22
|
+
def initialize(uri, options={})
|
23
|
+
super(options)
|
24
24
|
self.uri = uri
|
25
25
|
if @uri.class == URI::Generic
|
26
26
|
@content = FileContent.new(@uri.path)
|
data/lib/chupa-text/text-data.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -16,8 +16,8 @@
|
|
16
16
|
|
17
17
|
module ChupaText
|
18
18
|
class TextData < Data
|
19
|
-
def initialize(text)
|
20
|
-
super()
|
19
|
+
def initialize(text, options={})
|
20
|
+
super(options)
|
21
21
|
self.mime_type = "text/plain"
|
22
22
|
self.body = text
|
23
23
|
self.size = text.bytesize
|
data/lib/chupa-text/version.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -16,8 +16,8 @@
|
|
16
16
|
|
17
17
|
module ChupaText
|
18
18
|
class VirtualFileData < Data
|
19
|
-
def initialize(uri, input)
|
20
|
-
super()
|
19
|
+
def initialize(uri, input, options={})
|
20
|
+
super(options)
|
21
21
|
self.uri = uri
|
22
22
|
if @uri
|
23
23
|
path = @uri.path
|
metadata
CHANGED
@@ -1,83 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: test-unit
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: packnga
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: redcarpet
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
description: ''
|
@@ -89,85 +89,86 @@ executables:
|
|
89
89
|
extensions: []
|
90
90
|
extra_rdoc_files: []
|
91
91
|
files:
|
92
|
-
-
|
93
|
-
-
|
92
|
+
- ".yardopts"
|
93
|
+
- Gemfile
|
94
94
|
- LICENSE.txt
|
95
|
+
- README.md
|
95
96
|
- Rakefile
|
96
|
-
-
|
97
|
-
-
|
97
|
+
- bin/chupa-text
|
98
|
+
- bin/chupa-text-generate-decomposer
|
99
|
+
- chupa-text.gemspec
|
98
100
|
- data/chupa-text.conf
|
99
101
|
- data/mime-types.conf
|
100
|
-
-
|
101
|
-
-
|
102
|
-
-
|
103
|
-
-
|
104
|
-
- lib/chupa-text
|
102
|
+
- doc/text/command-line.md
|
103
|
+
- doc/text/decomposer.md
|
104
|
+
- doc/text/library.md
|
105
|
+
- doc/text/news.md
|
106
|
+
- lib/chupa-text.rb
|
107
|
+
- lib/chupa-text/attributes.rb
|
108
|
+
- lib/chupa-text/command.rb
|
105
109
|
- lib/chupa-text/command/chupa-text-generate-decomposer.rb
|
110
|
+
- lib/chupa-text/command/chupa-text.rb
|
111
|
+
- lib/chupa-text/configuration-loader.rb
|
106
112
|
- lib/chupa-text/configuration.rb
|
113
|
+
- lib/chupa-text/data.rb
|
107
114
|
- lib/chupa-text/decomposer-registry.rb
|
108
|
-
- lib/chupa-text/
|
109
|
-
- lib/chupa-text/
|
110
|
-
- lib/chupa-text/virtual-file-data.rb
|
111
|
-
- lib/chupa-text/size-parser.rb
|
112
|
-
- lib/chupa-text/formatters/json.rb
|
113
|
-
- lib/chupa-text/virtual-content.rb
|
115
|
+
- lib/chupa-text/decomposer.rb
|
116
|
+
- lib/chupa-text/decomposers.rb
|
114
117
|
- lib/chupa-text/decomposers/csv.rb
|
118
|
+
- lib/chupa-text/decomposers/gzip.rb
|
115
119
|
- lib/chupa-text/decomposers/tar.rb
|
116
120
|
- lib/chupa-text/decomposers/xml.rb
|
117
|
-
- lib/chupa-text/
|
121
|
+
- lib/chupa-text/default-logger.rb
|
118
122
|
- lib/chupa-text/error.rb
|
119
|
-
- lib/chupa-text/formatters.rb
|
120
|
-
- lib/chupa-text/decomposers.rb
|
121
|
-
- lib/chupa-text/command.rb
|
122
|
-
- lib/chupa-text/file-content.rb
|
123
|
-
- lib/chupa-text/decomposer.rb
|
124
123
|
- lib/chupa-text/external-command.rb
|
125
|
-
- lib/chupa-text/
|
126
|
-
- lib/chupa-text/
|
124
|
+
- lib/chupa-text/extractor.rb
|
125
|
+
- lib/chupa-text/file-content.rb
|
126
|
+
- lib/chupa-text/formatters.rb
|
127
|
+
- lib/chupa-text/formatters/json.rb
|
128
|
+
- lib/chupa-text/formatters/text.rb
|
127
129
|
- lib/chupa-text/input-data.rb
|
130
|
+
- lib/chupa-text/loggable.rb
|
131
|
+
- lib/chupa-text/logger.rb
|
128
132
|
- lib/chupa-text/mime-type-registry.rb
|
129
|
-
- lib/chupa-text/
|
130
|
-
- lib/chupa-text/
|
131
|
-
- lib/chupa-text.rb
|
132
|
-
-
|
133
|
-
-
|
134
|
-
-
|
135
|
-
- doc/text/library.md
|
136
|
-
- test/test-decomposers.rb
|
137
|
-
- test/test-default-logger.rb
|
138
|
-
- test/test-decomposer.rb
|
139
|
-
- test/test-virtual-content.rb
|
133
|
+
- lib/chupa-text/mime-type.rb
|
134
|
+
- lib/chupa-text/size-parser.rb
|
135
|
+
- lib/chupa-text/text-data.rb
|
136
|
+
- lib/chupa-text/version.rb
|
137
|
+
- lib/chupa-text/virtual-content.rb
|
138
|
+
- lib/chupa-text/virtual-file-data.rb
|
140
139
|
- test/command/test-chupa-text.rb
|
141
|
-
- test/helper.rb
|
142
|
-
- test/test-configuration-loader.rb
|
143
|
-
- test/test-mime-type-registry.rb
|
144
|
-
- test/test-decomposer-registry.rb
|
145
|
-
- test/fixture/gzip/hello.txt.gz
|
146
|
-
- test/fixture/gzip/hello.tgz
|
147
|
-
- test/fixture/gzip/hello.tar.gz
|
148
|
-
- test/fixture/tar/top-level.tar
|
149
|
-
- test/fixture/tar/directory.tar
|
150
|
-
- test/fixture/command/chupa-text/hello.txt.gz
|
151
|
-
- test/fixture/command/chupa-text/no-decomposer.conf
|
152
|
-
- test/fixture/command/chupa-text/hello.txt
|
153
|
-
- test/fixture/extractor/hello.txt
|
154
|
-
- test/test-attributes.rb
|
155
|
-
- test/test-external-command.rb
|
156
|
-
- test/test-size-parser.rb
|
157
140
|
- test/decomposers/test-csv.rb
|
158
141
|
- test/decomposers/test-gzip.rb
|
159
142
|
- test/decomposers/test-tar.rb
|
160
143
|
- test/decomposers/test-xml.rb
|
144
|
+
- test/fixture/command/chupa-text/hello.txt
|
145
|
+
- test/fixture/command/chupa-text/hello.txt.gz
|
146
|
+
- test/fixture/command/chupa-text/no-decomposer.conf
|
147
|
+
- test/fixture/extractor/hello.txt
|
148
|
+
- test/fixture/gzip/hello.tar.gz
|
149
|
+
- test/fixture/gzip/hello.tgz
|
150
|
+
- test/fixture/gzip/hello.txt.gz
|
151
|
+
- test/fixture/tar/directory.tar
|
152
|
+
- test/fixture/tar/top-level.tar
|
153
|
+
- test/helper.rb
|
154
|
+
- test/run-test.rb
|
155
|
+
- test/test-attributes.rb
|
156
|
+
- test/test-configuration-loader.rb
|
161
157
|
- test/test-data.rb
|
162
|
-
- test/test-
|
158
|
+
- test/test-decomposer-registry.rb
|
159
|
+
- test/test-decomposer.rb
|
160
|
+
- test/test-decomposers.rb
|
161
|
+
- test/test-default-logger.rb
|
162
|
+
- test/test-external-command.rb
|
163
163
|
- test/test-extractor.rb
|
164
|
-
- test/
|
164
|
+
- test/test-file-content.rb
|
165
|
+
- test/test-mime-type-registry.rb
|
166
|
+
- test/test-size-parser.rb
|
165
167
|
- test/test-text-data.rb
|
166
|
-
-
|
167
|
-
- bin/chupa-text-generate-decomposer
|
168
|
+
- test/test-virtual-content.rb
|
168
169
|
homepage: http://ranguba.org/#about-chupa-text
|
169
170
|
licenses:
|
170
|
-
-
|
171
|
+
- LGPL-2.1+
|
171
172
|
metadata: {}
|
172
173
|
post_install_message:
|
173
174
|
rdoc_options: []
|
@@ -175,20 +176,19 @@ require_paths:
|
|
175
176
|
- lib
|
176
177
|
required_ruby_version: !ruby/object:Gem::Requirement
|
177
178
|
requirements:
|
178
|
-
- -
|
179
|
+
- - ">="
|
179
180
|
- !ruby/object:Gem::Version
|
180
181
|
version: '0'
|
181
182
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
182
183
|
requirements:
|
183
|
-
- -
|
184
|
+
- - ">="
|
184
185
|
- !ruby/object:Gem::Version
|
185
186
|
version: '0'
|
186
187
|
requirements: []
|
187
188
|
rubyforge_project:
|
188
|
-
rubygems_version: 2.
|
189
|
+
rubygems_version: 2.5.2
|
189
190
|
signing_key:
|
190
191
|
specification_version: 4
|
191
192
|
summary: ChupaText is an extensible text extractor. You can plug your custom text
|
192
193
|
extractor in ChupaText. You can write your plugin by Ruby.
|
193
194
|
test_files: []
|
194
|
-
has_rdoc:
|