chupa-text 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b70738567326cdcde32a77574ef71c9981bb0ca2
4
- data.tar.gz: a7d5c2ffa050864b2d7448ded88dbda543695909
3
+ metadata.gz: c04261f7c3b4c26a0d67f18f473c8adf1d435080
4
+ data.tar.gz: 4e1a8da97abfa817226f919fcb672fb55d570d4d
5
5
  SHA512:
6
- metadata.gz: f64768ada9e980ee3ce8e052258ecce419c4474bb25a2b8ebee170c9e27510536412731c3587ca1eb4f5a027f3262104e636556074fcc95cf0f784550dca3c0e
7
- data.tar.gz: e3d862b6b5aa0ff30a49276327e1282fd2891a236b0822560f6c8f8aefbbfdf256ef27ad9ef7ba786e9c0120965f65c2f9d5a7a469cf98a17c7b5122bf8e1f50
6
+ metadata.gz: 00ada22d44d1f41ca4e2f08f02c43b41b193453ee33477dd02e9f5a9fbc0d87f5de46c53bc36a77b1494c2f8729a01e72419f9c9c9738d8819c04dde06c50d44
7
+ data.tar.gz: 7d404268095b308d4b7158e70458ac31d2dade5a0a131431559e84b34b563fb94454d5a3e900684dfe84da2642b382a14c55a479192992b141136a5f8659faf9
data/data/mime-types.conf CHANGED
@@ -1,19 +1,21 @@
1
1
  # -*- ruby -*-
2
2
 
3
- mime_type["txt"] = "text/plain"
3
+ mime_types["txt"] = "text/plain"
4
4
 
5
- mime_type["gz"] = "application/x-gzip"
6
- mime_type["tgz"] = "application/x-gtar-compressed"
5
+ mime_types["gz"] = "application/x-gzip"
6
+ mime_types["tgz"] = "application/x-gtar-compressed"
7
7
 
8
- mime_type["tar"] = "application/x-tar"
8
+ mime_types["tar"] = "application/x-tar"
9
9
 
10
- mime_type["htm"] = "text/html"
11
- mime_type["html"] = "text/html"
12
- mime_type["xhtml"] = "application/xhtml+xml"
10
+ mime_types["htm"] = "text/html"
11
+ mime_types["html"] = "text/html"
12
+ mime_types["xhtml"] = "application/xhtml+xml"
13
13
 
14
- mime_type["xml"] = "text/xml"
14
+ mime_types["xml"] = "text/xml"
15
15
 
16
- mime_type["css"] = "text/css"
16
+ mime_types["css"] = "text/css"
17
17
 
18
- mime_type["csv"] = "text/csv"
19
- mime_type["tsv"] = "text/tab-separated-values"
18
+ mime_types["csv"] = "text/csv"
19
+ mime_types["tsv"] = "text/tab-separated-values"
20
+
21
+ mime_types["pdf"] = "application/pdf"
@@ -123,14 +123,14 @@ command. You can use glob pattern for decomposer name such as
123
123
  The default is `["*"]`. It means that all installed decomposers are
124
124
  used.
125
125
 
126
- `mime_type["<extension>"] = "<MIME type>"`
126
+ `mime_types["<extension>"] = "<MIME type>"`
127
127
 
128
128
  It specifies a map to a MIME type from path extension.
129
129
 
130
130
  Here is an example that maps `"html"` to `"text/html"`:
131
131
 
132
132
  ```
133
- mime_type["html"] = "text/html"
133
+ mime_types["html"] = "text/html"
134
134
  ```
135
135
 
136
136
  Th default configuration file registers popular MIME types.
data/doc/text/news.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # News
2
2
 
3
+ ## 1.0.1: 2014-01-05
4
+
5
+ * chupa-text: Supported loading decomposers installed by RubyGems.
6
+ * chupa-text: Added `--disable-gems` option that disable loading
7
+ decomposers installed by RubyGems.
8
+ * chupa-text: Added `-I` option to use decomposers that are not
9
+ installed by RubyGems.
10
+ * Added {ChupaText::Data#text_plain?}.
11
+ * configuration: Changed `mime_types` from `mime_type` because they
12
+ processes about a set of MIME types.
13
+ * configuration: Added PDF to the default MIME type mappings.
14
+
3
15
  ## 1.0.0: 2014-01-05
4
16
 
5
17
  The first release!!!
@@ -29,12 +29,13 @@ module ChupaText
29
29
  def initialize
30
30
  @input = nil
31
31
  @configuration = Configuration.default
32
+ @enable_gems = true
32
33
  end
33
34
 
34
35
  def run(*arguments)
35
36
  return false unless parse_arguments(arguments)
36
37
 
37
- Decomposers.load
38
+ load_decomposers
38
39
  extractor = create_extractor
39
40
  data = create_data
40
41
  formatter = create_formatter
@@ -77,9 +78,22 @@ module ChupaText
77
78
  "Read configuration from FILE.") do |path|
78
79
  load_configuration(path)
79
80
  end
81
+ parser.on("--disable-gems",
82
+ "Disable decomposers installed by RubyGems.") do
83
+ @enable_gems = false
84
+ end
85
+ parser.on("-I=PATH",
86
+ "Append PATH to decomposer load path.") do |path|
87
+ $LOAD_PATH << path
88
+ end
80
89
  parser
81
90
  end
82
91
 
92
+ def load_decomposers
93
+ Decomposers.enable_all_gems if @enable_gems
94
+ Decomposers.load
95
+ end
96
+
83
97
  def create_extractor
84
98
  extractor = Extractor.new
85
99
  extractor.apply_configuration(@configuration)
@@ -19,11 +19,11 @@ require "pathname"
19
19
  module ChupaText
20
20
  class ConfigurationLoader
21
21
  attr_reader :decomposer
22
- attr_reader :mime_type
22
+ attr_reader :mime_types
23
23
  def initialize(configuration)
24
24
  @configuration = configuration
25
25
  @decomposer = DecomposerLoader.new(@configuration.decomposer)
26
- @mime_type = MIMETypeLoader.new(@configuration.mime_type_registry)
26
+ @mime_types = MIMETypesLoader.new(@configuration.mime_type_registry)
27
27
  @load_paths = []
28
28
  data_dir = File.join(File.dirname(__FILE__), "..", "..", "data")
29
29
  @load_paths << File.expand_path(data_dir)
@@ -82,7 +82,7 @@ module ChupaText
82
82
  end
83
83
  end
84
84
 
85
- class MIMETypeLoader
85
+ class MIMETypesLoader
86
86
  def initialize(registry)
87
87
  @registry = registry
88
88
  end
@@ -117,6 +117,12 @@ module ChupaText
117
117
  (mime_type || "").start_with?("text/")
118
118
  end
119
119
 
120
+ # @return [Bool] true if MIME type is "text/plain", false
121
+ # otherwise.
122
+ def text_plain?
123
+ mime_type == "text/plain"
124
+ end
125
+
120
126
  private
121
127
  def guess_mime_type
122
128
  guess_mime_type_from_uri or
@@ -17,6 +17,20 @@
17
17
  module ChupaText
18
18
  module Decomposers
19
19
  class << self
20
+ def enable_all_gems
21
+ decomposer_specs = Gem::Specification.find_all do |spec|
22
+ spec.name.start_with?("chupa-text-decomposer-")
23
+ end
24
+ grouped_decomposer_specs = decomposer_specs.group_by(&:name)
25
+ latest_decomposer_specs = []
26
+ grouped_decomposer_specs.each do |name, specs|
27
+ latest_decomposer_specs << specs.sort_by(&:version).last
28
+ end
29
+ latest_decomposer_specs.each do |spec|
30
+ gem(spec.name, spec.version)
31
+ end
32
+ end
33
+
20
34
  def load
21
35
  paths = []
22
36
  $LOAD_PATH.each do |load_path|
@@ -59,6 +59,10 @@ module ChupaText
59
59
  targets = [ensure_data(input)]
60
60
  until targets.empty?
61
61
  target = targets.pop
62
+ if target.text_plain?
63
+ yield(target)
64
+ next
65
+ end
62
66
  decomposer = find_decomposer(target)
63
67
  if decomposer.nil?
64
68
  yield(target) if target.text?
@@ -72,13 +76,10 @@ module ChupaText
72
76
 
73
77
  private
74
78
  def ensure_data(input)
75
- case input
76
- when String, Pathname, URI::Generic
77
- data = Data.new
78
- data.uri = input.to_s
79
- data
80
- else
79
+ if input.is_a?(Data)
81
80
  input
81
+ else
82
+ InputData.new(input)
82
83
  end
83
84
  end
84
85
 
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.0.0"
18
+ VERSION = "1.0.1"
19
19
  end
@@ -44,7 +44,7 @@ class TestCommandChupaText < Test::Unit::TestCase
44
44
 
45
45
  def run_command(*arguments)
46
46
  succeeded = wrap_io do
47
- ChupaText::Command::ChupaText.run(*arguments)
47
+ ChupaText::Command::ChupaText.run("--disable-gems", *arguments)
48
48
  end
49
49
  [succeeded, JSON.parse(@stdout.string)]
50
50
  end
data/test/test-data.rb CHANGED
@@ -56,6 +56,40 @@ class TestData < Test::Unit::TestCase
56
56
  end
57
57
  end
58
58
  end
59
+
60
+ sub_test_case("text?") do
61
+ def test_text_plain
62
+ @data.mime_type = "text/plain"
63
+ assert_true(@data.text?)
64
+ end
65
+
66
+ def test_text_html
67
+ @data.mime_type = "text/html"
68
+ assert_true(@data.text?)
69
+ end
70
+
71
+ def test_application_xhtml_xml
72
+ @data.mime_type = "application/xhtml+xml"
73
+ assert_false(@data.text?)
74
+ end
75
+ end
76
+
77
+ sub_test_case("text_plain?") do
78
+ def test_text_plain
79
+ @data.mime_type = "text/plain"
80
+ assert_true(@data.text_plain?)
81
+ end
82
+
83
+ def test_text_html
84
+ @data.mime_type = "text/html"
85
+ assert_false(@data.text_plain?)
86
+ end
87
+
88
+ def test_application_xhtml_xml
89
+ @data.mime_type = "application/xhtml+xml"
90
+ assert_false(@data.text_plain?)
91
+ end
92
+ end
59
93
  end
60
94
 
61
95
  sub_test_case("extension") do
@@ -97,12 +97,12 @@ class TestExtractor < Test::Unit::TestCase
97
97
  sub_test_case("multi decomposed") do
98
98
  class CopyDecomposer < ChupaText::Decomposer
99
99
  def target?(data)
100
- data["copied"].nil?
100
+ data.mime_type == "text/x-plain"
101
101
  end
102
102
 
103
103
  def decompose(data)
104
104
  copied_data = data.dup
105
- copied_data["copied"] = true
105
+ copied_data.mime_type = "text/plain"
106
106
  yield(copied_data.dup)
107
107
  yield(copied_data.dup)
108
108
  end
@@ -116,7 +116,7 @@ class TestExtractor < Test::Unit::TestCase
116
116
 
117
117
  def test_decompose
118
118
  data = ChupaText::Data.new
119
- data.mime_type = "text/plain"
119
+ data.mime_type = "text/x-plain"
120
120
  data.body = "Hello"
121
121
  assert_equal(["Hello", "Hello"], extract(data))
122
122
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-04 00:00:00.000000000 Z
11
+ date: 2014-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler