chupa-text-decomposer-spreadsheet 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b8961f069a2e1fadf709458213bda6dcd9983434
4
+ data.tar.gz: a01f9efa53a115f5d263a3d1065ac25988d5f11a
5
+ SHA512:
6
+ metadata.gz: 7a7902e4a9c9897863dc6dd3c19ecde3c12eeafdbacd8ad5fbfd669d78f694f1a240ec89b3054820b061157c17fc61e9305ddd62107960eb9d71850060e66880
7
+ data.tar.gz: 5f8ae62e2904e58c9e4cbbc58afc35454f137d7ec538af940c568810fe9128721e441700eb4e998e9c77c8b65b2438981f2bd8ce0d7bb1fb34f1a2b50c859380
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org/"
2
+
3
+ gemspec
@@ -0,0 +1,4 @@
1
+ # chupa-text-decomposer-spreadsheet
2
+
3
+ [Roo](https://github.com/roo-rb/roo) based chupa-text decomposer for spreadsheet.
4
+
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task :default => :test
4
+
5
+ desc "Run tests"
6
+ task :test do
7
+ ruby("test/run-test.rb")
8
+ end
@@ -0,0 +1,23 @@
1
+ # -*- mode: ruby; coding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "chupa-text-decomposer-spreadsheet"
5
+ spec.version = "1.0.0"
6
+ spec.author = "Kenji Okimoto"
7
+ spec.email = "okimoto@clear-code.com"
8
+ spec.summary = "ChupaText decomposer for spreadsheet."
9
+ spec.description = spec.summary
10
+ spec.license = "MIT"
11
+ spec.files = ["#{spec.name}.gemspec"]
12
+ spec.files += Dir.glob("{README*,LICENSE*,Rakefile,Gemfile}")
13
+ spec.files += Dir.glob("lib/**/*.rb")
14
+ spec.files += Dir.glob("test/fixture/**/*")
15
+ spec.files += Dir.glob("test/**/*.rb")
16
+
17
+ spec.add_runtime_dependency("chupa-text")
18
+ spec.add_runtime_dependency("roo")
19
+
20
+ spec.add_development_dependency("bundler")
21
+ spec.add_development_dependency("rake")
22
+ spec.add_development_dependency("test-unit")
23
+ end
@@ -0,0 +1,40 @@
1
+ require "roo"
2
+ require "digest/sha1"
3
+
4
+ module ChupaText
5
+ module Decomposers
6
+ class Spreadsheet < Decomposer
7
+ registry.register("spreadsheet", self)
8
+
9
+ TARGET_EXTENSIONS = ["ods", "xls", "xlsx", "xlsm", "xml"]
10
+
11
+ TARGET_MIME_TYPES = [
12
+ "application/vnd.oasis.opendocument.spreadsheet",
13
+ "application/vnd.ms-excel",
14
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
15
+ ]
16
+
17
+ def target?(data)
18
+ TARGET_EXTENSIONS.include?(data.extension) || TARGET_MIME_TYPES.include?(data.mime_type)
19
+ end
20
+
21
+ def decompose(data)
22
+ book = Roo::Spreadsheet.open(data.uri)
23
+ book.sheets.each do |sheet_name|
24
+ sheet = book.sheet(sheet_name)
25
+ body = sheet.to_csv
26
+ text_data = TextData.new(body)
27
+ text_data["name"] = sheet_name
28
+ text_data["digest"] = Digest::SHA1.hexdigest(body)
29
+ text_data["size"] = body.bytesize
30
+ text_data["first-row"] = sheet.first_row
31
+ text_data["last-row"] = sheet.last_row
32
+ text_data["first-column"] = sheet.first_column && sheet.first_column_as_letter
33
+ text_data["last-column"] = sheet.last_column && sheet.last_column_as_letter
34
+ yield text_data
35
+ end
36
+ book.close
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,6 @@
1
+ module Helper
2
+ def fixture_path(*components)
3
+ base_dir = File.expand_path(__dir__)
4
+ File.join(base_dir, "fixture", *components)
5
+ end
6
+ end
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+
5
+ require "test-unit"
6
+
7
+ require "chupa-text"
8
+ ChupaText::Decomposers.load
9
+
10
+ require_relative "helper"
11
+
12
+ exit(Test::Unit::AutoRunner.run(true))
@@ -0,0 +1,28 @@
1
+ class TestSpreadsheet < Test::Unit::TestCase
2
+ include Helper
3
+
4
+ def setup
5
+ @decomposer = ChupaText::Decomposers::Spreadsheet.new({})
6
+ end
7
+
8
+ sub_test_case("decompose") do
9
+ def decompose(input_body)
10
+ data = ChupaText::Data.new
11
+ data.mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
12
+ data.body = input_body
13
+
14
+ decomposed = []
15
+ @decomposer.decompose(data) do |decomposed_data|
16
+ decomposed << decomposed_data
17
+ end
18
+ decomposed
19
+ end
20
+
21
+ def test_body
22
+ input_body = "TODO (input)"
23
+ expected_text = "TODO (extracted)"
24
+ assert_equal([expected_text],
25
+ decompose(input_body).collect(&:body))
26
+ end
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,121 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: chupa-text-decomposer-spreadsheet
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Kenji Okimoto
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-04-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: chupa-text
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: roo
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: test-unit
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: ChupaText decomposer for spreadsheet.
84
+ email: okimoto@clear-code.com
85
+ executables: []
86
+ extensions: []
87
+ extra_rdoc_files: []
88
+ files:
89
+ - Gemfile
90
+ - README.md
91
+ - Rakefile
92
+ - chupa-text-decomposer-spreadsheet.gemspec
93
+ - lib/chupa-text/decomposers/spreadsheet.rb
94
+ - test/helper.rb
95
+ - test/run-test.rb
96
+ - test/test-spreadsheet.rb
97
+ homepage:
98
+ licenses:
99
+ - MIT
100
+ metadata: {}
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ requirements: []
116
+ rubyforge_project:
117
+ rubygems_version: 2.6.11
118
+ signing_key:
119
+ specification_version: 4
120
+ summary: ChupaText decomposer for spreadsheet.
121
+ test_files: []