chupa-text-decomposer-spreadsheet 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/chupa-text-decomposer-spreadsheet.gemspec +1 -1
- data/doc/text/news.md +6 -0
- data/lib/chupa-text/decomposers/spreadsheet.rb +42 -14
- data/test/fixture/xls/broken.xls +1 -0
- data/test/helper.rb +29 -0
- data/test/test-spreadsheet.rb +14 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b50a60537eb1f3f662b6f71da4ea8d29a85301d593b9dfbac4da2211c73017d4
|
4
|
+
data.tar.gz: e8ebf3da90b055a6a987409466d02013105fc3e48fb3eb5044a2fe1e81d41807
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eb709a20b9b5205653507304506b36e86e321def5d7846113822396121a50f32b61739ce88cab30573a8593bca02715e3fee57d2a588ee26e9fc96116c2fd2ea
|
7
|
+
data.tar.gz: '04315366589a40e543c6ef17808c7d902b135924bea0a5ce1b8df0b4bdf2be349a30c23eb30c9bc4391ea0e21abaaabd9982f64507e178bd294b8609776917ae'
|
data/doc/text/news.md
CHANGED
@@ -5,6 +5,8 @@ require "digest/sha1"
|
|
5
5
|
module ChupaText
|
6
6
|
module Decomposers
|
7
7
|
class Spreadsheet < Decomposer
|
8
|
+
include Loggable
|
9
|
+
|
8
10
|
registry.register("spreadsheet", self)
|
9
11
|
|
10
12
|
TARGET_EXTENSIONS = ["ods", "xls", "xlsx", "xlsm", "xml"]
|
@@ -29,21 +31,47 @@ module ChupaText
|
|
29
31
|
end
|
30
32
|
|
31
33
|
def decompose(data)
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
34
|
+
open_book(data) do |book|
|
35
|
+
book.sheets.each do |sheet_name|
|
36
|
+
sheet = book.sheet(sheet_name)
|
37
|
+
body = sheet.to_csv
|
38
|
+
text_data = TextData.new(body, source_data: data)
|
39
|
+
text_data["name"] = sheet_name
|
40
|
+
text_data["digest"] = Digest::SHA1.hexdigest(body)
|
41
|
+
text_data["size"] = body.bytesize
|
42
|
+
text_data["first-row"] = sheet.first_row
|
43
|
+
text_data["last-row"] = sheet.last_row
|
44
|
+
text_data["first-column"] = sheet.first_column && sheet.first_column_as_letter
|
45
|
+
text_data["last-column"] = sheet.last_column && sheet.last_column_as_letter
|
46
|
+
yield text_data
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def open_book(data)
|
53
|
+
book = nil
|
54
|
+
begin
|
55
|
+
book = Roo::Spreadsheet.open(data.path.to_s)
|
56
|
+
rescue Ole::Storage::FormatError => format_error
|
57
|
+
error do
|
58
|
+
message = "#{log_tag} Invalid format: "
|
59
|
+
message << "#{format_error.class}: #{format_error.message}\n"
|
60
|
+
message << format_error.backtrace.join("\n")
|
61
|
+
message
|
62
|
+
end
|
63
|
+
return
|
45
64
|
end
|
46
|
-
|
65
|
+
|
66
|
+
begin
|
67
|
+
yield(book)
|
68
|
+
ensure
|
69
|
+
book.close
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def log_tag
|
74
|
+
"[decomposer][spreadsheet]"
|
47
75
|
end
|
48
76
|
end
|
49
77
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Broken
|
data/test/helper.rb
CHANGED
@@ -3,4 +3,33 @@ module Helper
|
|
3
3
|
base_dir = File.expand_path(__dir__)
|
4
4
|
File.join(base_dir, "fixture", *components)
|
5
5
|
end
|
6
|
+
|
7
|
+
class CaptureLogger
|
8
|
+
def initialize(output)
|
9
|
+
@output = output
|
10
|
+
end
|
11
|
+
|
12
|
+
def error(message=nil)
|
13
|
+
@output << [:error, message || yield]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def capture_log
|
18
|
+
original_logger = ChupaText.logger
|
19
|
+
begin
|
20
|
+
output = []
|
21
|
+
ChupaText.logger = CaptureLogger.new(output)
|
22
|
+
yield
|
23
|
+
normalize_log(output)
|
24
|
+
ensure
|
25
|
+
ChupaText.logger = original_logger
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def normalize_log(log)
|
30
|
+
log.collect do |level, message|
|
31
|
+
message = message.split("\n", 2)[0]
|
32
|
+
[level, message]
|
33
|
+
end
|
34
|
+
end
|
6
35
|
end
|
data/test/test-spreadsheet.rb
CHANGED
@@ -45,6 +45,20 @@ class TestSpreadsheet < Test::Unit::TestCase
|
|
45
45
|
decompose("xls/multi-sheets.xls"))
|
46
46
|
end
|
47
47
|
|
48
|
+
def test_xls_broken
|
49
|
+
log = capture_log do
|
50
|
+
assert_equal([], decompose("xls/broken.xls"))
|
51
|
+
end
|
52
|
+
assert_equal([
|
53
|
+
[
|
54
|
+
:error,
|
55
|
+
"[decomposer][spreadsheet] Invalid format: " +
|
56
|
+
"Ole::Storage::FormatError: OLE2 signature is invalid"
|
57
|
+
],
|
58
|
+
],
|
59
|
+
log)
|
60
|
+
end
|
61
|
+
|
48
62
|
def test_xlsx
|
49
63
|
assert_equal([<<-SHEET1, <<-SHEET2, <<-SHEET3],
|
50
64
|
"Sheet1 - A1","Sheet1 - B1"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text-decomposer-spreadsheet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kenji Okimoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: chupa-text
|
@@ -108,6 +108,7 @@ files:
|
|
108
108
|
- doc/text/news.md
|
109
109
|
- lib/chupa-text/decomposers/spreadsheet.rb
|
110
110
|
- test/fixture/ods/multi-sheets.ods
|
111
|
+
- test/fixture/xls/broken.xls
|
111
112
|
- test/fixture/xls/multi-sheets.xls
|
112
113
|
- test/fixture/xlsx/multi-sheets.xlsx
|
113
114
|
- test/helper.rb
|