chupa-text 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +5 -0
  3. data/Gemfile +21 -0
  4. data/LICENSE.txt +502 -0
  5. data/README.md +91 -0
  6. data/Rakefile +46 -0
  7. data/bin/chupa-text +21 -0
  8. data/bin/chupa-text-generate-decomposer +21 -0
  9. data/chupa-text.gemspec +58 -0
  10. data/data/chupa-text.conf +5 -0
  11. data/data/mime-types.conf +19 -0
  12. data/doc/text/command-line.md +136 -0
  13. data/doc/text/decomposer.md +343 -0
  14. data/doc/text/library.md +72 -0
  15. data/doc/text/news.md +5 -0
  16. data/lib/chupa-text.rb +37 -0
  17. data/lib/chupa-text/command.rb +18 -0
  18. data/lib/chupa-text/command/chupa-text-generate-decomposer.rb +324 -0
  19. data/lib/chupa-text/command/chupa-text.rb +102 -0
  20. data/lib/chupa-text/configuration-loader.rb +95 -0
  21. data/lib/chupa-text/configuration.rb +49 -0
  22. data/lib/chupa-text/data.rb +149 -0
  23. data/lib/chupa-text/decomposer-registry.rb +37 -0
  24. data/lib/chupa-text/decomposer.rb +37 -0
  25. data/lib/chupa-text/decomposers.rb +59 -0
  26. data/lib/chupa-text/decomposers/csv.rb +44 -0
  27. data/lib/chupa-text/decomposers/gzip.rb +51 -0
  28. data/lib/chupa-text/decomposers/tar.rb +42 -0
  29. data/lib/chupa-text/decomposers/xml.rb +55 -0
  30. data/lib/chupa-text/extractor.rb +91 -0
  31. data/lib/chupa-text/file-content.rb +35 -0
  32. data/lib/chupa-text/formatters.rb +17 -0
  33. data/lib/chupa-text/formatters/json.rb +60 -0
  34. data/lib/chupa-text/input-data.rb +58 -0
  35. data/lib/chupa-text/mime-type-registry.rb +41 -0
  36. data/lib/chupa-text/mime-type.rb +36 -0
  37. data/lib/chupa-text/text-data.rb +26 -0
  38. data/lib/chupa-text/version.rb +19 -0
  39. data/lib/chupa-text/virtual-content.rb +91 -0
  40. data/lib/chupa-text/virtual-file-data.rb +46 -0
  41. data/test/command/test-chupa-text.rb +178 -0
  42. data/test/decomposers/test-csv.rb +48 -0
  43. data/test/decomposers/test-gzip.rb +113 -0
  44. data/test/decomposers/test-tar.rb +78 -0
  45. data/test/decomposers/test-xml.rb +58 -0
  46. data/test/fixture/command/chupa-text/hello.txt +1 -0
  47. data/test/fixture/command/chupa-text/hello.txt.gz +0 -0
  48. data/test/fixture/command/chupa-text/no-decomposer.conf +3 -0
  49. data/test/fixture/extractor/hello.txt +1 -0
  50. data/test/fixture/gzip/hello.tar.gz +0 -0
  51. data/test/fixture/gzip/hello.tgz +0 -0
  52. data/test/fixture/gzip/hello.txt.gz +0 -0
  53. data/test/fixture/tar/directory.tar +0 -0
  54. data/test/fixture/tar/top-level.tar +0 -0
  55. data/test/helper.rb +25 -0
  56. data/test/run-test.rb +35 -0
  57. data/test/test-configuration-loader.rb +54 -0
  58. data/test/test-data.rb +85 -0
  59. data/test/test-decomposer-registry.rb +30 -0
  60. data/test/test-decomposer.rb +41 -0
  61. data/test/test-decomposers.rb +59 -0
  62. data/test/test-extractor.rb +125 -0
  63. data/test/test-file-content.rb +51 -0
  64. data/test/test-mime-type-registry.rb +48 -0
  65. data/test/test-text-data.rb +36 -0
  66. data/test/test-virtual-content.rb +103 -0
  67. metadata +183 -0
@@ -0,0 +1,48 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersCSV< Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::CSV.new({})
22
+ end
23
+
24
+ sub_test_case("decompose") do
25
+ def test_body
26
+ csv = <<-CSV
27
+ Hello,World
28
+ Ruby,ChupaText
29
+ CSV
30
+ assert_equal([csv.gsub(/,/, " ")],
31
+ decompose(csv).collect(&:body))
32
+ end
33
+
34
+ private
35
+ def decompose(csv)
36
+ data = ChupaText::Data.new
37
+ data.path = "hello.csv"
38
+ data.mime_type = "text/csv"
39
+ data.body = csv
40
+
41
+ decomposed = []
42
+ @decomposer.decompose(data) do |decomposed_data|
43
+ decomposed << decomposed_data
44
+ end
45
+ decomposed
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,113 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersGzip < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::Gzip.new({})
22
+ end
23
+
24
+ private
25
+ def fixture_path(*components)
26
+ super("gzip", *components)
27
+ end
28
+
29
+ sub_test_case("decompose") do
30
+ def decompose(data)
31
+ decomposed = []
32
+ @decomposer.decompose(data) do |decomposed_data|
33
+ decomposed << decomposed_data
34
+ end
35
+ decomposed
36
+ end
37
+
38
+ sub_test_case("gz") do
39
+ def setup
40
+ super
41
+ @data = ChupaText::InputData.new(fixture_path("hello.txt.gz"))
42
+ end
43
+
44
+ def test_path
45
+ assert_equal([URI.parse(fixture_path("hello.txt").to_s)],
46
+ decompose(@data).collect(&:uri))
47
+ end
48
+
49
+ def test_body
50
+ assert_equal(["Hello\n"],
51
+ decompose(@data).collect(&:body))
52
+ end
53
+
54
+ def test_source
55
+ assert_equal([@data],
56
+ decompose(@data).collect(&:source))
57
+ end
58
+ end
59
+
60
+ sub_test_case("tar.gz") do
61
+ def setup
62
+ super
63
+ @data = ChupaText::InputData.new(fixture_path("hello.tar.gz"))
64
+ end
65
+
66
+ def test_uri
67
+ assert_equal([URI.parse(fixture_path("hello.tar").to_s)],
68
+ decompose(@data).collect(&:uri))
69
+ end
70
+
71
+ def test_body
72
+ tar_magic = "ustar"
73
+ magics = decompose(@data).collect do |decomposed|
74
+ decomposed.body[257, tar_magic.bytesize]
75
+ end
76
+ assert_equal([tar_magic],
77
+ magics)
78
+ end
79
+
80
+ def test_source
81
+ assert_equal([@data],
82
+ decompose(@data).collect(&:source))
83
+ end
84
+ end
85
+
86
+
87
+ sub_test_case("tgz") do
88
+ def setup
89
+ super
90
+ @data = ChupaText::InputData.new(fixture_path("hello.tgz"))
91
+ end
92
+
93
+ def test_uri
94
+ assert_equal([URI.parse(fixture_path("hello.tar").to_s)],
95
+ decompose(@data).collect(&:uri))
96
+ end
97
+
98
+ def test_body
99
+ tar_magic = "ustar"
100
+ magics = decompose(@data).collect do |decomposed|
101
+ decomposed.body[257, tar_magic.bytesize]
102
+ end
103
+ assert_equal([tar_magic],
104
+ magics)
105
+ end
106
+
107
+ def test_source
108
+ assert_equal([@data],
109
+ decompose(@data).collect(&:source))
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,78 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersTar < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::Tar.new({})
22
+ end
23
+
24
+ private
25
+ def fixture_path(*components)
26
+ super("tar", *components)
27
+ end
28
+
29
+ sub_test_case("decompose") do
30
+ def decompose(data)
31
+ decomposed = []
32
+ @decomposer.decompose(data) do |decomposed_data|
33
+ decomposed << {
34
+ :uri => decomposed_data.uri.to_s,
35
+ :body => decomposed_data.body,
36
+ :source => decomposed_data.source.uri.to_s,
37
+ }
38
+ end
39
+ decomposed
40
+ end
41
+
42
+ sub_test_case("top-level") do
43
+ def setup
44
+ super
45
+ @data = ChupaText::InputData.new(fixture_path("top-level.tar"))
46
+ end
47
+
48
+ def test_decompose
49
+ assert_equal([
50
+ {
51
+ :uri => "top-level.txt",
52
+ :body => "top level\n",
53
+ :source => @data.uri.to_s,
54
+ },
55
+ ],
56
+ decompose(@data))
57
+ end
58
+ end
59
+
60
+ sub_test_case("directory") do
61
+ def setup
62
+ super
63
+ @data = ChupaText::InputData.new(fixture_path("directory.tar"))
64
+ end
65
+
66
+ def test_decompose
67
+ assert_equal([
68
+ {
69
+ :uri => "directory/hello.txt",
70
+ :body => "Hello in directory\n",
71
+ :source => @data.uri.to_s,
72
+ },
73
+ ],
74
+ decompose(@data))
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,58 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersXML < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::XML.new({})
22
+ end
23
+
24
+ sub_test_case("decompose") do
25
+ def test_body
26
+ xml = <<-XML
27
+ <root>
28
+ Hello
29
+ <sub-element attribute="value">&amp;</sub-element>
30
+ World
31
+ </root>
32
+ XML
33
+ text = <<-TEXT
34
+
35
+ Hello
36
+ &
37
+ World
38
+
39
+ TEXT
40
+ assert_equal([text],
41
+ decompose(xml).collect(&:body))
42
+ end
43
+
44
+ private
45
+ def decompose(xml)
46
+ data = ChupaText::Data.new
47
+ data.path = "hello.xml"
48
+ data.mime_type = "text/xml"
49
+ data.body = xml
50
+
51
+ decomposed = []
52
+ @decomposer.decompose(data) do |decomposed_data|
53
+ decomposed << decomposed_data
54
+ end
55
+ decomposed
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,3 @@
1
+ # -*- ruby -*-
2
+
3
+ decomposer.names = []
@@ -0,0 +1 @@
1
+ Hello
@@ -0,0 +1,25 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ require "pathname"
18
+ require "tempfile"
19
+
20
+ module Helper
21
+ def fixture_path(*components)
22
+ base_path = Pathname(__FILE__).dirname + "fixture"
23
+ base_path.join(*components)
24
+ end
25
+ end
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
4
+ #
5
+ # This library is free software; you can redistribute it and/or
6
+ # modify it under the terms of the GNU Lesser General Public
7
+ # License as published by the Free Software Foundation; either
8
+ # version 2.1 of the License, or (at your option) any later version.
9
+ #
10
+ # This library is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ # Lesser General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU Lesser General Public
16
+ # License along with this library; if not, write to the Free Software
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+
19
+ $VERBOSE = true
20
+
21
+ require "pathname"
22
+
23
+ require "test-unit"
24
+
25
+ base_dir = Pathname(__FILE__).dirname.parent
26
+ lib_dir = base_dir + "lib"
27
+ $LOAD_PATH.unshift(lib_dir.to_s)
28
+
29
+ require "chupa-text"
30
+
31
+ ChupaText::Decomposers.load
32
+
33
+ require_relative "helper"
34
+
35
+ exit(Test::Unit::AutoRunner.run(true))
@@ -0,0 +1,54 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestConfiguration < Test::Unit::TestCase
18
+ def setup
19
+ @configuration = ChupaText::Configuration.new
20
+ @loader = ChupaText::ConfigurationLoader.new(@configuration)
21
+ end
22
+
23
+ private
24
+ def load(content)
25
+ file = Tempfile.new("chupa-text")
26
+ file.print(content)
27
+ file.flush
28
+ @loader.load(file.path)
29
+ file
30
+ end
31
+
32
+ sub_test_case("decomposer") do
33
+ def test_names
34
+ load(<<-CONFIGURATION)
35
+ decomposer.names = ["tar", "zip"]
36
+ CONFIGURATION
37
+ assert_equal(["tar", "zip"], @configuration.decomposer.names)
38
+ end
39
+
40
+ def test_option
41
+ load(<<-CONFIGURATION)
42
+ decomposer.tar = {
43
+ :omit_size => true
44
+ }
45
+ CONFIGURATION
46
+ assert_equal({
47
+ "tar" => {
48
+ :omit_size => true,
49
+ },
50
+ },
51
+ @configuration.decomposer.options)
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,85 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestData < Test::Unit::TestCase
18
+ def setup
19
+ @data = ChupaText::Data.new
20
+ @registry = ChupaText::MIMETypeRegistry.new
21
+ @original_registry = ChupaText::MIMEType.registry
22
+ ChupaText::MIMEType.registry = @registry
23
+ end
24
+
25
+ def teardown
26
+ ChupaText::MIMEType.registry = @original_registry
27
+ end
28
+
29
+ sub_test_case("mime-type") do
30
+ sub_test_case("guess") do
31
+ sub_test_case("extension") do
32
+ def test_txt
33
+ ChupaText::MIMEType.registry.register("txt", "text/plain")
34
+ assert_equal("text/plain", guess("README.txt"))
35
+ end
36
+
37
+ private
38
+ def guess(uri)
39
+ @data.body = "dummy"
40
+ @data.uri = uri
41
+ @data.mime_type
42
+ end
43
+ end
44
+
45
+ sub_test_case("body") do
46
+ def test_txt
47
+ body = "Hello"
48
+ body.force_encoding("ASCII-8BIT")
49
+ assert_equal("text/plain", guess(body))
50
+ end
51
+
52
+ private
53
+ def guess(body)
54
+ @data.body = body
55
+ @data.mime_type
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ sub_test_case("extension") do
62
+ def test_no_uri
63
+ assert_nil(extension(nil))
64
+ end
65
+
66
+ def test_lower_case
67
+ assert_equal("md", extension("README.md"))
68
+ end
69
+
70
+ def test_upper_case
71
+ assert_equal("md", extension("README.MD"))
72
+ end
73
+
74
+ def test_mixed_case
75
+ assert_equal("md", extension("README.mD"))
76
+ end
77
+
78
+ private
79
+ def extension(uri)
80
+ @data.body = "dummy"
81
+ @data.uri = uri
82
+ @data.extension
83
+ end
84
+ end
85
+ end