chupa-text 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +5 -0
  3. data/Gemfile +21 -0
  4. data/LICENSE.txt +502 -0
  5. data/README.md +91 -0
  6. data/Rakefile +46 -0
  7. data/bin/chupa-text +21 -0
  8. data/bin/chupa-text-generate-decomposer +21 -0
  9. data/chupa-text.gemspec +58 -0
  10. data/data/chupa-text.conf +5 -0
  11. data/data/mime-types.conf +19 -0
  12. data/doc/text/command-line.md +136 -0
  13. data/doc/text/decomposer.md +343 -0
  14. data/doc/text/library.md +72 -0
  15. data/doc/text/news.md +5 -0
  16. data/lib/chupa-text.rb +37 -0
  17. data/lib/chupa-text/command.rb +18 -0
  18. data/lib/chupa-text/command/chupa-text-generate-decomposer.rb +324 -0
  19. data/lib/chupa-text/command/chupa-text.rb +102 -0
  20. data/lib/chupa-text/configuration-loader.rb +95 -0
  21. data/lib/chupa-text/configuration.rb +49 -0
  22. data/lib/chupa-text/data.rb +149 -0
  23. data/lib/chupa-text/decomposer-registry.rb +37 -0
  24. data/lib/chupa-text/decomposer.rb +37 -0
  25. data/lib/chupa-text/decomposers.rb +59 -0
  26. data/lib/chupa-text/decomposers/csv.rb +44 -0
  27. data/lib/chupa-text/decomposers/gzip.rb +51 -0
  28. data/lib/chupa-text/decomposers/tar.rb +42 -0
  29. data/lib/chupa-text/decomposers/xml.rb +55 -0
  30. data/lib/chupa-text/extractor.rb +91 -0
  31. data/lib/chupa-text/file-content.rb +35 -0
  32. data/lib/chupa-text/formatters.rb +17 -0
  33. data/lib/chupa-text/formatters/json.rb +60 -0
  34. data/lib/chupa-text/input-data.rb +58 -0
  35. data/lib/chupa-text/mime-type-registry.rb +41 -0
  36. data/lib/chupa-text/mime-type.rb +36 -0
  37. data/lib/chupa-text/text-data.rb +26 -0
  38. data/lib/chupa-text/version.rb +19 -0
  39. data/lib/chupa-text/virtual-content.rb +91 -0
  40. data/lib/chupa-text/virtual-file-data.rb +46 -0
  41. data/test/command/test-chupa-text.rb +178 -0
  42. data/test/decomposers/test-csv.rb +48 -0
  43. data/test/decomposers/test-gzip.rb +113 -0
  44. data/test/decomposers/test-tar.rb +78 -0
  45. data/test/decomposers/test-xml.rb +58 -0
  46. data/test/fixture/command/chupa-text/hello.txt +1 -0
  47. data/test/fixture/command/chupa-text/hello.txt.gz +0 -0
  48. data/test/fixture/command/chupa-text/no-decomposer.conf +3 -0
  49. data/test/fixture/extractor/hello.txt +1 -0
  50. data/test/fixture/gzip/hello.tar.gz +0 -0
  51. data/test/fixture/gzip/hello.tgz +0 -0
  52. data/test/fixture/gzip/hello.txt.gz +0 -0
  53. data/test/fixture/tar/directory.tar +0 -0
  54. data/test/fixture/tar/top-level.tar +0 -0
  55. data/test/helper.rb +25 -0
  56. data/test/run-test.rb +35 -0
  57. data/test/test-configuration-loader.rb +54 -0
  58. data/test/test-data.rb +85 -0
  59. data/test/test-decomposer-registry.rb +30 -0
  60. data/test/test-decomposer.rb +41 -0
  61. data/test/test-decomposers.rb +59 -0
  62. data/test/test-extractor.rb +125 -0
  63. data/test/test-file-content.rb +51 -0
  64. data/test/test-mime-type-registry.rb +48 -0
  65. data/test/test-text-data.rb +36 -0
  66. data/test/test-virtual-content.rb +103 -0
  67. metadata +183 -0
@@ -0,0 +1,48 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersCSV< Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::CSV.new({})
22
+ end
23
+
24
+ sub_test_case("decompose") do
25
+ def test_body
26
+ csv = <<-CSV
27
+ Hello,World
28
+ Ruby,ChupaText
29
+ CSV
30
+ assert_equal([csv.gsub(/,/, " ")],
31
+ decompose(csv).collect(&:body))
32
+ end
33
+
34
+ private
35
+ def decompose(csv)
36
+ data = ChupaText::Data.new
37
+ data.path = "hello.csv"
38
+ data.mime_type = "text/csv"
39
+ data.body = csv
40
+
41
+ decomposed = []
42
+ @decomposer.decompose(data) do |decomposed_data|
43
+ decomposed << decomposed_data
44
+ end
45
+ decomposed
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,113 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersGzip < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::Gzip.new({})
22
+ end
23
+
24
+ private
25
+ def fixture_path(*components)
26
+ super("gzip", *components)
27
+ end
28
+
29
+ sub_test_case("decompose") do
30
+ def decompose(data)
31
+ decomposed = []
32
+ @decomposer.decompose(data) do |decomposed_data|
33
+ decomposed << decomposed_data
34
+ end
35
+ decomposed
36
+ end
37
+
38
+ sub_test_case("gz") do
39
+ def setup
40
+ super
41
+ @data = ChupaText::InputData.new(fixture_path("hello.txt.gz"))
42
+ end
43
+
44
+ def test_path
45
+ assert_equal([URI.parse(fixture_path("hello.txt").to_s)],
46
+ decompose(@data).collect(&:uri))
47
+ end
48
+
49
+ def test_body
50
+ assert_equal(["Hello\n"],
51
+ decompose(@data).collect(&:body))
52
+ end
53
+
54
+ def test_source
55
+ assert_equal([@data],
56
+ decompose(@data).collect(&:source))
57
+ end
58
+ end
59
+
60
+ sub_test_case("tar.gz") do
61
+ def setup
62
+ super
63
+ @data = ChupaText::InputData.new(fixture_path("hello.tar.gz"))
64
+ end
65
+
66
+ def test_uri
67
+ assert_equal([URI.parse(fixture_path("hello.tar").to_s)],
68
+ decompose(@data).collect(&:uri))
69
+ end
70
+
71
+ def test_body
72
+ tar_magic = "ustar"
73
+ magics = decompose(@data).collect do |decomposed|
74
+ decomposed.body[257, tar_magic.bytesize]
75
+ end
76
+ assert_equal([tar_magic],
77
+ magics)
78
+ end
79
+
80
+ def test_source
81
+ assert_equal([@data],
82
+ decompose(@data).collect(&:source))
83
+ end
84
+ end
85
+
86
+
87
+ sub_test_case("tgz") do
88
+ def setup
89
+ super
90
+ @data = ChupaText::InputData.new(fixture_path("hello.tgz"))
91
+ end
92
+
93
+ def test_uri
94
+ assert_equal([URI.parse(fixture_path("hello.tar").to_s)],
95
+ decompose(@data).collect(&:uri))
96
+ end
97
+
98
+ def test_body
99
+ tar_magic = "ustar"
100
+ magics = decompose(@data).collect do |decomposed|
101
+ decomposed.body[257, tar_magic.bytesize]
102
+ end
103
+ assert_equal([tar_magic],
104
+ magics)
105
+ end
106
+
107
+ def test_source
108
+ assert_equal([@data],
109
+ decompose(@data).collect(&:source))
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,78 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersTar < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::Tar.new({})
22
+ end
23
+
24
+ private
25
+ def fixture_path(*components)
26
+ super("tar", *components)
27
+ end
28
+
29
+ sub_test_case("decompose") do
30
+ def decompose(data)
31
+ decomposed = []
32
+ @decomposer.decompose(data) do |decomposed_data|
33
+ decomposed << {
34
+ :uri => decomposed_data.uri.to_s,
35
+ :body => decomposed_data.body,
36
+ :source => decomposed_data.source.uri.to_s,
37
+ }
38
+ end
39
+ decomposed
40
+ end
41
+
42
+ sub_test_case("top-level") do
43
+ def setup
44
+ super
45
+ @data = ChupaText::InputData.new(fixture_path("top-level.tar"))
46
+ end
47
+
48
+ def test_decompose
49
+ assert_equal([
50
+ {
51
+ :uri => "top-level.txt",
52
+ :body => "top level\n",
53
+ :source => @data.uri.to_s,
54
+ },
55
+ ],
56
+ decompose(@data))
57
+ end
58
+ end
59
+
60
+ sub_test_case("directory") do
61
+ def setup
62
+ super
63
+ @data = ChupaText::InputData.new(fixture_path("directory.tar"))
64
+ end
65
+
66
+ def test_decompose
67
+ assert_equal([
68
+ {
69
+ :uri => "directory/hello.txt",
70
+ :body => "Hello in directory\n",
71
+ :source => @data.uri.to_s,
72
+ },
73
+ ],
74
+ decompose(@data))
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,58 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersXML < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::XML.new({})
22
+ end
23
+
24
+ sub_test_case("decompose") do
25
+ def test_body
26
+ xml = <<-XML
27
+ <root>
28
+ Hello
29
+ <sub-element attribute="value">&amp;</sub-element>
30
+ World
31
+ </root>
32
+ XML
33
+ text = <<-TEXT
34
+
35
+ Hello
36
+ &
37
+ World
38
+
39
+ TEXT
40
+ assert_equal([text],
41
+ decompose(xml).collect(&:body))
42
+ end
43
+
44
+ private
45
+ def decompose(xml)
46
+ data = ChupaText::Data.new
47
+ data.path = "hello.xml"
48
+ data.mime_type = "text/xml"
49
+ data.body = xml
50
+
51
+ decomposed = []
52
+ @decomposer.decompose(data) do |decomposed_data|
53
+ decomposed << decomposed_data
54
+ end
55
+ decomposed
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,3 @@
1
+ # -*- ruby -*-
2
+
3
+ decomposer.names = []
@@ -0,0 +1 @@
1
+ Hello
@@ -0,0 +1,25 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ require "pathname"
18
+ require "tempfile"
19
+
20
+ module Helper
21
+ def fixture_path(*components)
22
+ base_path = Pathname(__FILE__).dirname + "fixture"
23
+ base_path.join(*components)
24
+ end
25
+ end
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
4
+ #
5
+ # This library is free software; you can redistribute it and/or
6
+ # modify it under the terms of the GNU Lesser General Public
7
+ # License as published by the Free Software Foundation; either
8
+ # version 2.1 of the License, or (at your option) any later version.
9
+ #
10
+ # This library is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ # Lesser General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU Lesser General Public
16
+ # License along with this library; if not, write to the Free Software
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+
19
+ $VERBOSE = true
20
+
21
+ require "pathname"
22
+
23
+ require "test-unit"
24
+
25
+ base_dir = Pathname(__FILE__).dirname.parent
26
+ lib_dir = base_dir + "lib"
27
+ $LOAD_PATH.unshift(lib_dir.to_s)
28
+
29
+ require "chupa-text"
30
+
31
+ ChupaText::Decomposers.load
32
+
33
+ require_relative "helper"
34
+
35
+ exit(Test::Unit::AutoRunner.run(true))
@@ -0,0 +1,54 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestConfiguration < Test::Unit::TestCase
18
+ def setup
19
+ @configuration = ChupaText::Configuration.new
20
+ @loader = ChupaText::ConfigurationLoader.new(@configuration)
21
+ end
22
+
23
+ private
24
+ def load(content)
25
+ file = Tempfile.new("chupa-text")
26
+ file.print(content)
27
+ file.flush
28
+ @loader.load(file.path)
29
+ file
30
+ end
31
+
32
+ sub_test_case("decomposer") do
33
+ def test_names
34
+ load(<<-CONFIGURATION)
35
+ decomposer.names = ["tar", "zip"]
36
+ CONFIGURATION
37
+ assert_equal(["tar", "zip"], @configuration.decomposer.names)
38
+ end
39
+
40
+ def test_option
41
+ load(<<-CONFIGURATION)
42
+ decomposer.tar = {
43
+ :omit_size => true
44
+ }
45
+ CONFIGURATION
46
+ assert_equal({
47
+ "tar" => {
48
+ :omit_size => true,
49
+ },
50
+ },
51
+ @configuration.decomposer.options)
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,85 @@
1
+ # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestData < Test::Unit::TestCase
18
+ def setup
19
+ @data = ChupaText::Data.new
20
+ @registry = ChupaText::MIMETypeRegistry.new
21
+ @original_registry = ChupaText::MIMEType.registry
22
+ ChupaText::MIMEType.registry = @registry
23
+ end
24
+
25
+ def teardown
26
+ ChupaText::MIMEType.registry = @original_registry
27
+ end
28
+
29
+ sub_test_case("mime-type") do
30
+ sub_test_case("guess") do
31
+ sub_test_case("extension") do
32
+ def test_txt
33
+ ChupaText::MIMEType.registry.register("txt", "text/plain")
34
+ assert_equal("text/plain", guess("README.txt"))
35
+ end
36
+
37
+ private
38
+ def guess(uri)
39
+ @data.body = "dummy"
40
+ @data.uri = uri
41
+ @data.mime_type
42
+ end
43
+ end
44
+
45
+ sub_test_case("body") do
46
+ def test_txt
47
+ body = "Hello"
48
+ body.force_encoding("ASCII-8BIT")
49
+ assert_equal("text/plain", guess(body))
50
+ end
51
+
52
+ private
53
+ def guess(body)
54
+ @data.body = body
55
+ @data.mime_type
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ sub_test_case("extension") do
62
+ def test_no_uri
63
+ assert_nil(extension(nil))
64
+ end
65
+
66
+ def test_lower_case
67
+ assert_equal("md", extension("README.md"))
68
+ end
69
+
70
+ def test_upper_case
71
+ assert_equal("md", extension("README.MD"))
72
+ end
73
+
74
+ def test_mixed_case
75
+ assert_equal("md", extension("README.mD"))
76
+ end
77
+
78
+ private
79
+ def extension(uri)
80
+ @data.body = "dummy"
81
+ @data.uri = uri
82
+ @data.extension
83
+ end
84
+ end
85
+ end