rbxl 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +85 -24
- data/Rakefile +6 -0
- data/ext/rbxl_native/extconf.rb +23 -34
- data/ext/rbxl_native/native.c +127 -7
- data/lib/rbxl/cell.rb +15 -0
- data/lib/rbxl/empty_cell.rb +15 -0
- data/lib/rbxl/errors.rb +29 -0
- data/lib/rbxl/native.rb +16 -1
- data/lib/rbxl/read_only_cell.rb +10 -0
- data/lib/rbxl/read_only_workbook.rb +83 -6
- data/lib/rbxl/read_only_worksheet.rb +119 -7
- data/lib/rbxl/row.rb +34 -1
- data/lib/rbxl/version.rb +2 -1
- data/lib/rbxl/write_only_cell.rb +19 -1
- data/lib/rbxl/write_only_workbook.rb +57 -9
- data/lib/rbxl/write_only_worksheet.rb +41 -0
- data/lib/rbxl.rb +96 -2
- data/sig/rbxl.rbs +128 -0
- metadata +7 -3
data/lib/rbxl.rb
CHANGED
|
@@ -16,14 +16,108 @@ require_relative "rbxl/write_only_cell"
|
|
|
16
16
|
require_relative "rbxl/write_only_workbook"
|
|
17
17
|
require_relative "rbxl/write_only_worksheet"
|
|
18
18
|
|
|
19
|
+
# Minimal, memory-friendly XLSX reader/writer inspired by +openpyxl+.
|
|
20
|
+
#
|
|
21
|
+
# Rbxl exposes two explicit, non-overlapping modes:
|
|
22
|
+
#
|
|
23
|
+
# * {Rbxl.open} returns a {Rbxl::ReadOnlyWorkbook} for row-by-row reads
|
|
24
|
+
# * {Rbxl.new} returns a {Rbxl::WriteOnlyWorkbook} for one-shot writes
|
|
25
|
+
#
|
|
26
|
+
# The API is intentionally narrow so that memory usage stays predictable
|
|
27
|
+
# for large workbooks. Neither mode materializes the full workbook in
|
|
28
|
+
# memory; reads pull rows from the underlying XML one at a time, and writes
|
|
29
|
+
# accumulate only the rows added before {Rbxl::WriteOnlyWorkbook#save}.
|
|
30
|
+
#
|
|
31
|
+
# == Reading
|
|
32
|
+
#
|
|
33
|
+
# require "rbxl"
|
|
34
|
+
#
|
|
35
|
+
# book = Rbxl.open("report.xlsx", read_only: true)
|
|
36
|
+
# sheet = book.sheet("Report")
|
|
37
|
+
# sheet.each_row(values_only: true) { |values| p values }
|
|
38
|
+
# book.close
|
|
39
|
+
#
|
|
40
|
+
# == Writing
|
|
41
|
+
#
|
|
42
|
+
# require "rbxl"
|
|
43
|
+
#
|
|
44
|
+
# book = Rbxl.new(write_only: true)
|
|
45
|
+
# sheet = book.add_sheet("Report")
|
|
46
|
+
# sheet << ["id", "name", "score"]
|
|
47
|
+
# sheet << [1, "alice", 100]
|
|
48
|
+
# book.save("report.xlsx")
|
|
49
|
+
#
|
|
50
|
+
# == Native extension
|
|
51
|
+
#
|
|
52
|
+
# Requiring <tt>"rbxl/native"</tt> after <tt>"rbxl"</tt> swaps the hot
|
|
53
|
+
# worksheet XML paths for a libxml2-backed C implementation with the same
|
|
54
|
+
# observable behavior. See the README for build requirements.
|
|
19
55
|
module Rbxl
|
|
56
|
+
# Maximum number of shared strings accepted from a workbook's
|
|
57
|
+
# +xl/sharedStrings.xml+ entry. Defaults to 10 million, which comfortably
|
|
58
|
+
# covers real-world enterprise workbooks while rejecting files crafted to
|
|
59
|
+
# exhaust memory before any row is read. Set to +nil+ to disable.
|
|
60
|
+
@max_shared_strings = 10_000_000
|
|
61
|
+
|
|
62
|
+
# Maximum total byte size of the shared strings table once decoded.
|
|
63
|
+
# Defaults to 512 MiB. Applied both to the ZIP entry's declared
|
|
64
|
+
# uncompressed size (cheap early rejection of zip bombs) and to the
|
|
65
|
+
# running sum while parsing. Set to +nil+ to disable.
|
|
66
|
+
@max_shared_string_bytes = 512 * 1024 * 1024
|
|
67
|
+
|
|
68
|
+
# Maximum uncompressed byte size accepted from a single worksheet's XML
|
|
69
|
+
# entry while iterating in +streaming: true+ mode. Default is +nil+
|
|
70
|
+
# (unbounded) because legitimate worksheets can be arbitrarily large.
|
|
71
|
+
# Set a positive integer to bound peak inflation and stop high-compression
|
|
72
|
+
# zip-bomb worksheets mid-inflate.
|
|
73
|
+
@max_worksheet_bytes = nil
|
|
74
|
+
|
|
20
75
|
class << self
|
|
21
|
-
|
|
76
|
+
# @return [Integer, nil] configured shared-strings count cap
|
|
77
|
+
attr_accessor :max_shared_strings
|
|
78
|
+
|
|
79
|
+
# @return [Integer, nil] configured shared-strings byte cap
|
|
80
|
+
attr_accessor :max_shared_string_bytes
|
|
81
|
+
|
|
82
|
+
# @return [Integer, nil] per-worksheet streaming byte cap
|
|
83
|
+
attr_accessor :max_worksheet_bytes
|
|
84
|
+
|
|
85
|
+
# Opens an existing workbook in read-only row-by-row mode.
|
|
86
|
+
#
|
|
87
|
+
# The +read_only+ keyword is required and must be +true+. It exists to
|
|
88
|
+
# mark the intent explicitly and to leave room for a future read/write
|
|
89
|
+
# mode without changing the default behavior of {.open}.
|
|
90
|
+
#
|
|
91
|
+
# With <tt>streaming: true</tt>, the native backend (when loaded) feeds
|
|
92
|
+
# worksheet XML to the parser in chunks pulled from the ZIP input stream
|
|
93
|
+
# instead of materializing the entire worksheet as one Ruby string. This
|
|
94
|
+
# keeps peak memory roughly independent of worksheet size and lets
|
|
95
|
+
# {Rbxl.max_worksheet_bytes} bound how much is inflated. Streaming mode
|
|
96
|
+
# is the same API and output shape — only the inflation strategy
|
|
97
|
+
# differs — and typically pays back a few percent of throughput on small
|
|
98
|
+
# sheets in exchange for the flat memory profile.
|
|
99
|
+
#
|
|
100
|
+
# @param path [String, #to_path] filesystem path to an <tt>.xlsx</tt> file
|
|
101
|
+
# @param read_only [Boolean] must be +true+ for the current API
|
|
102
|
+
# @param streaming [Boolean] feed worksheet XML to the native parser in
|
|
103
|
+
# chunks instead of fully inflating the entry in advance. Ignored when
|
|
104
|
+
# the native extension is not loaded.
|
|
105
|
+
# @return [Rbxl::ReadOnlyWorkbook]
|
|
106
|
+
# @raise [ArgumentError] if +read_only+ is not +true+
|
|
107
|
+
def open(path, read_only: false, streaming: false)
|
|
22
108
|
raise ArgumentError, "read_only: true is required for this MVP" unless read_only
|
|
23
109
|
|
|
24
|
-
ReadOnlyWorkbook.open(path)
|
|
110
|
+
ReadOnlyWorkbook.open(path, streaming: streaming)
|
|
25
111
|
end
|
|
26
112
|
|
|
113
|
+
# Creates a new workbook in write-only mode.
|
|
114
|
+
#
|
|
115
|
+
# The +write_only+ keyword is required and must be +true+ to make the
|
|
116
|
+
# save-once, append-only contract obvious at the call site.
|
|
117
|
+
#
|
|
118
|
+
# @param write_only [Boolean] must be +true+ for the current API
|
|
119
|
+
# @return [Rbxl::WriteOnlyWorkbook]
|
|
120
|
+
# @raise [ArgumentError] if +write_only+ is not +true+
|
|
27
121
|
def new(write_only: false)
|
|
28
122
|
raise ArgumentError, "write_only: true is required for this MVP" unless write_only
|
|
29
123
|
|
data/sig/rbxl.rbs
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
module Rbxl
|
|
2
|
+
VERSION: String
|
|
3
|
+
|
|
4
|
+
type cell_value = String | Integer | Float | bool | nil
|
|
5
|
+
type pathish = String | Pathname
|
|
6
|
+
type row_input = Array[untyped] | Enumerator[untyped, untyped]
|
|
7
|
+
type row_values = Array[cell_value]
|
|
8
|
+
type row_cell = Cell | ReadOnlyCell | EmptyCell
|
|
9
|
+
type row_cells = Array[row_cell]
|
|
10
|
+
type dimensions = { ref: String, max_col: Integer, max_row: Integer }
|
|
11
|
+
|
|
12
|
+
def self.open: (pathish path, ?read_only: bool, ?streaming: bool) -> ReadOnlyWorkbook
|
|
13
|
+
def self.new: (?write_only: bool) -> WriteOnlyWorkbook
|
|
14
|
+
|
|
15
|
+
attr_accessor self.max_shared_strings: Integer?
|
|
16
|
+
attr_accessor self.max_shared_string_bytes: Integer?
|
|
17
|
+
attr_accessor self.max_worksheet_bytes: Integer?
|
|
18
|
+
|
|
19
|
+
class Error < StandardError
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
class SheetNotFoundError < Error
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
class ClosedWorkbookError < Error
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
class WorkbookAlreadySavedError < Error
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
class UnsizedWorksheetError < Error
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
class SharedStringsTooLargeError < Error
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class WorksheetTooLargeError < Error
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
class Cell
|
|
41
|
+
attr_accessor value: cell_value
|
|
42
|
+
attr_accessor coordinate: String?
|
|
43
|
+
|
|
44
|
+
def initialize: (?value: cell_value, ?coordinate: String?) -> void
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
class ReadOnlyCell
|
|
48
|
+
attr_reader coordinate: String
|
|
49
|
+
attr_reader value: cell_value
|
|
50
|
+
|
|
51
|
+
def initialize: (String coordinate, cell_value value) -> void
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
class EmptyCell
|
|
55
|
+
attr_reader coordinate: String
|
|
56
|
+
|
|
57
|
+
def initialize: (coordinate: String) -> void
|
|
58
|
+
def value: () -> nil
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
class WriteOnlyCell
|
|
62
|
+
attr_reader value: untyped
|
|
63
|
+
attr_reader style_id: Integer?
|
|
64
|
+
|
|
65
|
+
def initialize: (untyped value, ?style_id: Integer?) -> void
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
class Row
|
|
69
|
+
attr_reader index: Integer
|
|
70
|
+
attr_reader cells: row_cells
|
|
71
|
+
|
|
72
|
+
def initialize: (index: Integer, cells: row_cells) -> void
|
|
73
|
+
def []: (Integer offset) -> row_cell?
|
|
74
|
+
def values: () -> row_values
|
|
75
|
+
def size: () -> Integer
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
class ReadOnlyWorkbook
|
|
79
|
+
MAIN_NS: String
|
|
80
|
+
REL_NS: String
|
|
81
|
+
PACKAGE_REL_NS: String
|
|
82
|
+
|
|
83
|
+
attr_reader path: String
|
|
84
|
+
attr_reader sheet_names: Array[String]
|
|
85
|
+
|
|
86
|
+
def self.open: (pathish path, ?streaming: bool) -> ReadOnlyWorkbook
|
|
87
|
+
def initialize: (pathish path, ?streaming: bool) -> void
|
|
88
|
+
def sheet: (String name) -> ReadOnlyWorksheet
|
|
89
|
+
def close: () -> void
|
|
90
|
+
def closed?: () -> bool
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
class ReadOnlyWorksheet
|
|
94
|
+
attr_reader name: String
|
|
95
|
+
attr_reader dimensions: dimensions?
|
|
96
|
+
|
|
97
|
+
def initialize: (zip: untyped, entry_path: String, shared_strings: Array[String], name: String, ?streaming: bool) -> void
|
|
98
|
+
|
|
99
|
+
def each_row: (?pad_cells: bool, ?values_only: bool, ?expand_merged: bool) { (Row | row_values) -> void } -> void
|
|
100
|
+
| (?pad_cells: bool, ?values_only: bool, ?expand_merged: bool) -> Enumerator[Row | row_values, void]
|
|
101
|
+
|
|
102
|
+
def rows: (?values_only: bool, ?pad_cells: bool, ?expand_merged: bool) -> Enumerator[Row | row_values, void]
|
|
103
|
+
|
|
104
|
+
def max_column: () -> Integer?
|
|
105
|
+
def max_row: () -> Integer?
|
|
106
|
+
def reset_dimensions: () -> nil
|
|
107
|
+
def calculate_dimension: (?force: bool) -> String
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
class WriteOnlyWorkbook
|
|
111
|
+
attr_reader worksheets: Array[WriteOnlyWorksheet]
|
|
112
|
+
|
|
113
|
+
def initialize: () -> void
|
|
114
|
+
def add_sheet: (String name) -> WriteOnlyWorksheet
|
|
115
|
+
def save: (pathish path) -> String
|
|
116
|
+
def close: () -> bool
|
|
117
|
+
def closed?: () -> bool
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
class WriteOnlyWorksheet
|
|
121
|
+
attr_reader name: String
|
|
122
|
+
|
|
123
|
+
def initialize: (name: String) -> void
|
|
124
|
+
def <<: (row_input values) -> WriteOnlyWorksheet
|
|
125
|
+
def append: (row_input values) -> WriteOnlyWorksheet
|
|
126
|
+
def to_xml: () -> String
|
|
127
|
+
end
|
|
128
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rbxl
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Taro KOBAYASHI
|
|
@@ -43,7 +43,8 @@ dependencies:
|
|
|
43
43
|
- - "<"
|
|
44
44
|
- !ruby/object:Gem::Version
|
|
45
45
|
version: '2.0'
|
|
46
|
-
description:
|
|
46
|
+
description: rbxl is a Ruby gem for read-only row-by-row iteration and write-only
|
|
47
|
+
XLSX generation, with an optional native extension for faster XML parsing.
|
|
47
48
|
email:
|
|
48
49
|
- taro@matzlika.co.jp
|
|
49
50
|
executables: []
|
|
@@ -51,6 +52,7 @@ extensions:
|
|
|
51
52
|
- ext/rbxl_native/extconf.rb
|
|
52
53
|
extra_rdoc_files: []
|
|
53
54
|
files:
|
|
55
|
+
- CHANGELOG.md
|
|
54
56
|
- LICENSE.txt
|
|
55
57
|
- README.md
|
|
56
58
|
- Rakefile
|
|
@@ -69,6 +71,7 @@ files:
|
|
|
69
71
|
- lib/rbxl/write_only_cell.rb
|
|
70
72
|
- lib/rbxl/write_only_workbook.rb
|
|
71
73
|
- lib/rbxl/write_only_worksheet.rb
|
|
74
|
+
- sig/rbxl.rbs
|
|
72
75
|
homepage: https://github.com/matzlika/rbxl
|
|
73
76
|
licenses:
|
|
74
77
|
- MIT
|
|
@@ -93,5 +96,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
93
96
|
requirements: []
|
|
94
97
|
rubygems_version: 4.0.3
|
|
95
98
|
specification_version: 4
|
|
96
|
-
summary:
|
|
99
|
+
summary: A fast, memory-friendly Ruby gem for row-by-row XLSX reads and append-only
|
|
100
|
+
writes.
|
|
97
101
|
test_files: []
|