chupa-text 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b70738567326cdcde32a77574ef71c9981bb0ca2
4
- data.tar.gz: a7d5c2ffa050864b2d7448ded88dbda543695909
3
+ metadata.gz: c04261f7c3b4c26a0d67f18f473c8adf1d435080
4
+ data.tar.gz: 4e1a8da97abfa817226f919fcb672fb55d570d4d
5
5
  SHA512:
6
- metadata.gz: f64768ada9e980ee3ce8e052258ecce419c4474bb25a2b8ebee170c9e27510536412731c3587ca1eb4f5a027f3262104e636556074fcc95cf0f784550dca3c0e
7
- data.tar.gz: e3d862b6b5aa0ff30a49276327e1282fd2891a236b0822560f6c8f8aefbbfdf256ef27ad9ef7ba786e9c0120965f65c2f9d5a7a469cf98a17c7b5122bf8e1f50
6
+ metadata.gz: 00ada22d44d1f41ca4e2f08f02c43b41b193453ee33477dd02e9f5a9fbc0d87f5de46c53bc36a77b1494c2f8729a01e72419f9c9c9738d8819c04dde06c50d44
7
+ data.tar.gz: 7d404268095b308d4b7158e70458ac31d2dade5a0a131431559e84b34b563fb94454d5a3e900684dfe84da2642b382a14c55a479192992b141136a5f8659faf9
data/data/mime-types.conf CHANGED
@@ -1,19 +1,21 @@
1
1
  # -*- ruby -*-
2
2
 
3
- mime_type["txt"] = "text/plain"
3
+ mime_types["txt"] = "text/plain"
4
4
 
5
- mime_type["gz"] = "application/x-gzip"
6
- mime_type["tgz"] = "application/x-gtar-compressed"
5
+ mime_types["gz"] = "application/x-gzip"
6
+ mime_types["tgz"] = "application/x-gtar-compressed"
7
7
 
8
- mime_type["tar"] = "application/x-tar"
8
+ mime_types["tar"] = "application/x-tar"
9
9
 
10
- mime_type["htm"] = "text/html"
11
- mime_type["html"] = "text/html"
12
- mime_type["xhtml"] = "application/xhtml+xml"
10
+ mime_types["htm"] = "text/html"
11
+ mime_types["html"] = "text/html"
12
+ mime_types["xhtml"] = "application/xhtml+xml"
13
13
 
14
- mime_type["xml"] = "text/xml"
14
+ mime_types["xml"] = "text/xml"
15
15
 
16
- mime_type["css"] = "text/css"
16
+ mime_types["css"] = "text/css"
17
17
 
18
- mime_type["csv"] = "text/csv"
19
- mime_type["tsv"] = "text/tab-separated-values"
18
+ mime_types["csv"] = "text/csv"
19
+ mime_types["tsv"] = "text/tab-separated-values"
20
+
21
+ mime_types["pdf"] = "application/pdf"
@@ -123,14 +123,14 @@ command. You can use glob pattern for decomposer name such as
123
123
  The default is `["*"]`. It means that all installed decomposers are
124
124
  used.
125
125
 
126
- `mime_type["<extension>"] = "<MIME type>"`
126
+ `mime_types["<extension>"] = "<MIME type>"`
127
127
 
128
128
  It specifies a map to a MIME type from path extension.
129
129
 
130
130
  Here is an example that maps `"html"` to `"text/html"`:
131
131
 
132
132
  ```
133
- mime_type["html"] = "text/html"
133
+ mime_types["html"] = "text/html"
134
134
  ```
135
135
 
136
136
  Th default configuration file registers popular MIME types.
data/doc/text/news.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # News
2
2
 
3
+ ## 1.0.1: 2014-01-05
4
+
5
+ * chupa-text: Supported loading decomposers installed by RubyGems.
6
+ * chupa-text: Added `--disable-gems` option that disable loading
7
+ decomposers installed by RubyGems.
8
+ * chupa-text: Added `-I` option to use decomposers that are not
9
+ installed by RubyGems.
10
+ * Added {ChupaText::Data#text_plain?}.
11
+ * configuration: Changed `mime_types` from `mime_type` because they
12
+ processes about a set of MIME types.
13
+ * configuration: Added PDF to the default MIME type mappings.
14
+
3
15
  ## 1.0.0: 2014-01-05
4
16
 
5
17
  The first release!!!
@@ -29,12 +29,13 @@ module ChupaText
29
29
  def initialize
30
30
  @input = nil
31
31
  @configuration = Configuration.default
32
+ @enable_gems = true
32
33
  end
33
34
 
34
35
  def run(*arguments)
35
36
  return false unless parse_arguments(arguments)
36
37
 
37
- Decomposers.load
38
+ load_decomposers
38
39
  extractor = create_extractor
39
40
  data = create_data
40
41
  formatter = create_formatter
@@ -77,9 +78,22 @@ module ChupaText
77
78
  "Read configuration from FILE.") do |path|
78
79
  load_configuration(path)
79
80
  end
81
+ parser.on("--disable-gems",
82
+ "Disable decomposers installed by RubyGems.") do
83
+ @enable_gems = false
84
+ end
85
+ parser.on("-I=PATH",
86
+ "Append PATH to decomposer load path.") do |path|
87
+ $LOAD_PATH << path
88
+ end
80
89
  parser
81
90
  end
82
91
 
92
+ def load_decomposers
93
+ Decomposers.enable_all_gems if @enable_gems
94
+ Decomposers.load
95
+ end
96
+
83
97
  def create_extractor
84
98
  extractor = Extractor.new
85
99
  extractor.apply_configuration(@configuration)
@@ -19,11 +19,11 @@ require "pathname"
19
19
  module ChupaText
20
20
  class ConfigurationLoader
21
21
  attr_reader :decomposer
22
- attr_reader :mime_type
22
+ attr_reader :mime_types
23
23
  def initialize(configuration)
24
24
  @configuration = configuration
25
25
  @decomposer = DecomposerLoader.new(@configuration.decomposer)
26
- @mime_type = MIMETypeLoader.new(@configuration.mime_type_registry)
26
+ @mime_types = MIMETypesLoader.new(@configuration.mime_type_registry)
27
27
  @load_paths = []
28
28
  data_dir = File.join(File.dirname(__FILE__), "..", "..", "data")
29
29
  @load_paths << File.expand_path(data_dir)
@@ -82,7 +82,7 @@ module ChupaText
82
82
  end
83
83
  end
84
84
 
85
- class MIMETypeLoader
85
+ class MIMETypesLoader
86
86
  def initialize(registry)
87
87
  @registry = registry
88
88
  end
@@ -117,6 +117,12 @@ module ChupaText
117
117
  (mime_type || "").start_with?("text/")
118
118
  end
119
119
 
120
+ # @return [Bool] true if MIME type is "text/plain", false
121
+ # otherwise.
122
+ def text_plain?
123
+ mime_type == "text/plain"
124
+ end
125
+
120
126
  private
121
127
  def guess_mime_type
122
128
  guess_mime_type_from_uri or
@@ -17,6 +17,20 @@
17
17
  module ChupaText
18
18
  module Decomposers
19
19
  class << self
20
+ def enable_all_gems
21
+ decomposer_specs = Gem::Specification.find_all do |spec|
22
+ spec.name.start_with?("chupa-text-decomposer-")
23
+ end
24
+ grouped_decomposer_specs = decomposer_specs.group_by(&:name)
25
+ latest_decomposer_specs = []
26
+ grouped_decomposer_specs.each do |name, specs|
27
+ latest_decomposer_specs << specs.sort_by(&:version).last
28
+ end
29
+ latest_decomposer_specs.each do |spec|
30
+ gem(spec.name, spec.version)
31
+ end
32
+ end
33
+
20
34
  def load
21
35
  paths = []
22
36
  $LOAD_PATH.each do |load_path|
@@ -59,6 +59,10 @@ module ChupaText
59
59
  targets = [ensure_data(input)]
60
60
  until targets.empty?
61
61
  target = targets.pop
62
+ if target.text_plain?
63
+ yield(target)
64
+ next
65
+ end
62
66
  decomposer = find_decomposer(target)
63
67
  if decomposer.nil?
64
68
  yield(target) if target.text?
@@ -72,13 +76,10 @@ module ChupaText
72
76
 
73
77
  private
74
78
  def ensure_data(input)
75
- case input
76
- when String, Pathname, URI::Generic
77
- data = Data.new
78
- data.uri = input.to_s
79
- data
80
- else
79
+ if input.is_a?(Data)
81
80
  input
81
+ else
82
+ InputData.new(input)
82
83
  end
83
84
  end
84
85
 
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.0.0"
18
+ VERSION = "1.0.1"
19
19
  end
@@ -44,7 +44,7 @@ class TestCommandChupaText < Test::Unit::TestCase
44
44
 
45
45
  def run_command(*arguments)
46
46
  succeeded = wrap_io do
47
- ChupaText::Command::ChupaText.run(*arguments)
47
+ ChupaText::Command::ChupaText.run("--disable-gems", *arguments)
48
48
  end
49
49
  [succeeded, JSON.parse(@stdout.string)]
50
50
  end
data/test/test-data.rb CHANGED
@@ -56,6 +56,40 @@ class TestData < Test::Unit::TestCase
56
56
  end
57
57
  end
58
58
  end
59
+
60
+ sub_test_case("text?") do
61
+ def test_text_plain
62
+ @data.mime_type = "text/plain"
63
+ assert_true(@data.text?)
64
+ end
65
+
66
+ def test_text_html
67
+ @data.mime_type = "text/html"
68
+ assert_true(@data.text?)
69
+ end
70
+
71
+ def test_application_xhtml_xml
72
+ @data.mime_type = "application/xhtml+xml"
73
+ assert_false(@data.text?)
74
+ end
75
+ end
76
+
77
+ sub_test_case("text_plain?") do
78
+ def test_text_plain
79
+ @data.mime_type = "text/plain"
80
+ assert_true(@data.text_plain?)
81
+ end
82
+
83
+ def test_text_html
84
+ @data.mime_type = "text/html"
85
+ assert_false(@data.text_plain?)
86
+ end
87
+
88
+ def test_application_xhtml_xml
89
+ @data.mime_type = "application/xhtml+xml"
90
+ assert_false(@data.text_plain?)
91
+ end
92
+ end
59
93
  end
60
94
 
61
95
  sub_test_case("extension") do
@@ -97,12 +97,12 @@ class TestExtractor < Test::Unit::TestCase
97
97
  sub_test_case("multi decomposed") do
98
98
  class CopyDecomposer < ChupaText::Decomposer
99
99
  def target?(data)
100
- data["copied"].nil?
100
+ data.mime_type == "text/x-plain"
101
101
  end
102
102
 
103
103
  def decompose(data)
104
104
  copied_data = data.dup
105
- copied_data["copied"] = true
105
+ copied_data.mime_type = "text/plain"
106
106
  yield(copied_data.dup)
107
107
  yield(copied_data.dup)
108
108
  end
@@ -116,7 +116,7 @@ class TestExtractor < Test::Unit::TestCase
116
116
 
117
117
  def test_decompose
118
118
  data = ChupaText::Data.new
119
- data.mime_type = "text/plain"
119
+ data.mime_type = "text/x-plain"
120
120
  data.body = "Hello"
121
121
  assert_equal(["Hello", "Hello"], extract(data))
122
122
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-04 00:00:00.000000000 Z
11
+ date: 2014-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler