rbxl 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rbxl.rb CHANGED
@@ -16,14 +16,108 @@ require_relative "rbxl/write_only_cell"
16
16
  require_relative "rbxl/write_only_workbook"
17
17
  require_relative "rbxl/write_only_worksheet"
18
18
 
19
+ # Minimal, memory-friendly XLSX reader/writer inspired by +openpyxl+.
20
+ #
21
+ # Rbxl exposes two explicit, non-overlapping modes:
22
+ #
23
+ # * {Rbxl.open} returns a {Rbxl::ReadOnlyWorkbook} for row-by-row reads
24
+ # * {Rbxl.new} returns a {Rbxl::WriteOnlyWorkbook} for one-shot writes
25
+ #
26
+ # The API is intentionally narrow so that memory usage stays predictable
27
+ # for large workbooks. Neither mode materializes the full workbook in
28
+ # memory; reads pull rows from the underlying XML one at a time, and writes
29
+ # accumulate only the rows added before {Rbxl::WriteOnlyWorkbook#save}.
30
+ #
31
+ # == Reading
32
+ #
33
+ # require "rbxl"
34
+ #
35
+ # book = Rbxl.open("report.xlsx", read_only: true)
36
+ # sheet = book.sheet("Report")
37
+ # sheet.each_row(values_only: true) { |values| p values }
38
+ # book.close
39
+ #
40
+ # == Writing
41
+ #
42
+ # require "rbxl"
43
+ #
44
+ # book = Rbxl.new(write_only: true)
45
+ # sheet = book.add_sheet("Report")
46
+ # sheet << ["id", "name", "score"]
47
+ # sheet << [1, "alice", 100]
48
+ # book.save("report.xlsx")
49
+ #
50
+ # == Native extension
51
+ #
52
+ # Requiring <tt>"rbxl/native"</tt> after <tt>"rbxl"</tt> swaps the hot
53
+ # worksheet XML paths for a libxml2-backed C implementation with the same
54
+ # observable behavior. See the README for build requirements.
19
55
  module Rbxl
56
+ # Maximum number of shared strings accepted from a workbook's
57
+ # +xl/sharedStrings.xml+ entry. Defaults to 10 million, which comfortably
58
+ # covers real-world enterprise workbooks while rejecting files crafted to
59
+ # exhaust memory before any row is read. Set to +nil+ to disable.
60
+ @max_shared_strings = 10_000_000
61
+
62
+ # Maximum total byte size of the shared strings table once decoded.
63
+ # Defaults to 512 MiB. Applied both to the ZIP entry's declared
64
+ # uncompressed size (cheap early rejection of zip bombs) and to the
65
+ # running sum while parsing. Set to +nil+ to disable.
66
+ @max_shared_string_bytes = 512 * 1024 * 1024
67
+
68
+ # Maximum uncompressed byte size accepted from a single worksheet's XML
69
+ # entry while iterating in +streaming: true+ mode. Default is +nil+
70
+ # (unbounded) because legitimate worksheets can be arbitrarily large.
71
+ # Set a positive integer to bound peak inflation and stop high-compression
72
+ # zip-bomb worksheets mid-inflate.
73
+ @max_worksheet_bytes = nil
74
+
20
75
  class << self
21
- def open(path, read_only: false)
76
+ # @return [Integer, nil] configured shared-strings count cap
77
+ attr_accessor :max_shared_strings
78
+
79
+ # @return [Integer, nil] configured shared-strings byte cap
80
+ attr_accessor :max_shared_string_bytes
81
+
82
+ # @return [Integer, nil] per-worksheet streaming byte cap
83
+ attr_accessor :max_worksheet_bytes
84
+
85
+ # Opens an existing workbook in read-only row-by-row mode.
86
+ #
87
+ # The +read_only+ keyword is required and must be +true+. It exists to
88
+ # mark the intent explicitly and to leave room for a future read/write
89
+ # mode without changing the default behavior of {.open}.
90
+ #
91
+ # With <tt>streaming: true</tt>, the native backend (when loaded) feeds
92
+ # worksheet XML to the parser in chunks pulled from the ZIP input stream
93
+ # instead of materializing the entire worksheet as one Ruby string. This
94
+ # keeps peak memory roughly independent of worksheet size and lets
95
+ # {Rbxl.max_worksheet_bytes} bound how much is inflated. Streaming mode
96
+ # is the same API and output shape — only the inflation strategy
97
+ # differs — and typically pays back a few percent of throughput on small
98
+ # sheets in exchange for the flat memory profile.
99
+ #
100
+ # @param path [String, #to_path] filesystem path to an <tt>.xlsx</tt> file
101
+ # @param read_only [Boolean] must be +true+ for the current API
102
+ # @param streaming [Boolean] feed worksheet XML to the native parser in
103
+ # chunks instead of fully inflating the entry in advance. Ignored when
104
+ # the native extension is not loaded.
105
+ # @return [Rbxl::ReadOnlyWorkbook]
106
+ # @raise [ArgumentError] if +read_only+ is not +true+
107
+ def open(path, read_only: false, streaming: false)
22
108
  raise ArgumentError, "read_only: true is required for this MVP" unless read_only
23
109
 
24
- ReadOnlyWorkbook.open(path)
110
+ ReadOnlyWorkbook.open(path, streaming: streaming)
25
111
  end
26
112
 
113
+ # Creates a new workbook in write-only mode.
114
+ #
115
+ # The +write_only+ keyword is required and must be +true+ to make the
116
+ # save-once, append-only contract obvious at the call site.
117
+ #
118
+ # @param write_only [Boolean] must be +true+ for the current API
119
+ # @return [Rbxl::WriteOnlyWorkbook]
120
+ # @raise [ArgumentError] if +write_only+ is not +true+
27
121
  def new(write_only: false)
28
122
  raise ArgumentError, "write_only: true is required for this MVP" unless write_only
29
123
 
data/sig/rbxl.rbs ADDED
@@ -0,0 +1,128 @@
1
+ module Rbxl
2
+ VERSION: String
3
+
4
+ type cell_value = String | Integer | Float | bool | nil
5
+ type pathish = String | Pathname
6
+ type row_input = Array[untyped] | Enumerator[untyped, untyped]
7
+ type row_values = Array[cell_value]
8
+ type row_cell = Cell | ReadOnlyCell | EmptyCell
9
+ type row_cells = Array[row_cell]
10
+ type dimensions = { ref: String, max_col: Integer, max_row: Integer }
11
+
12
+ def self.open: (pathish path, ?read_only: bool, ?streaming: bool) -> ReadOnlyWorkbook
13
+ def self.new: (?write_only: bool) -> WriteOnlyWorkbook
14
+
15
+ attr_accessor self.max_shared_strings: Integer?
16
+ attr_accessor self.max_shared_string_bytes: Integer?
17
+ attr_accessor self.max_worksheet_bytes: Integer?
18
+
19
+ class Error < StandardError
20
+ end
21
+
22
+ class SheetNotFoundError < Error
23
+ end
24
+
25
+ class ClosedWorkbookError < Error
26
+ end
27
+
28
+ class WorkbookAlreadySavedError < Error
29
+ end
30
+
31
+ class UnsizedWorksheetError < Error
32
+ end
33
+
34
+ class SharedStringsTooLargeError < Error
35
+ end
36
+
37
+ class WorksheetTooLargeError < Error
38
+ end
39
+
40
+ class Cell
41
+ attr_accessor value: cell_value
42
+ attr_accessor coordinate: String?
43
+
44
+ def initialize: (?value: cell_value, ?coordinate: String?) -> void
45
+ end
46
+
47
+ class ReadOnlyCell
48
+ attr_reader coordinate: String
49
+ attr_reader value: cell_value
50
+
51
+ def initialize: (String coordinate, cell_value value) -> void
52
+ end
53
+
54
+ class EmptyCell
55
+ attr_reader coordinate: String
56
+
57
+ def initialize: (coordinate: String) -> void
58
+ def value: () -> nil
59
+ end
60
+
61
+ class WriteOnlyCell
62
+ attr_reader value: untyped
63
+ attr_reader style_id: Integer?
64
+
65
+ def initialize: (untyped value, ?style_id: Integer?) -> void
66
+ end
67
+
68
+ class Row
69
+ attr_reader index: Integer
70
+ attr_reader cells: row_cells
71
+
72
+ def initialize: (index: Integer, cells: row_cells) -> void
73
+ def []: (Integer offset) -> row_cell?
74
+ def values: () -> row_values
75
+ def size: () -> Integer
76
+ end
77
+
78
+ class ReadOnlyWorkbook
79
+ MAIN_NS: String
80
+ REL_NS: String
81
+ PACKAGE_REL_NS: String
82
+
83
+ attr_reader path: String
84
+ attr_reader sheet_names: Array[String]
85
+
86
+ def self.open: (pathish path, ?streaming: bool) -> ReadOnlyWorkbook
87
+ def initialize: (pathish path, ?streaming: bool) -> void
88
+ def sheet: (String name) -> ReadOnlyWorksheet
89
+ def close: () -> void
90
+ def closed?: () -> bool
91
+ end
92
+
93
+ class ReadOnlyWorksheet
94
+ attr_reader name: String
95
+ attr_reader dimensions: dimensions?
96
+
97
+ def initialize: (zip: untyped, entry_path: String, shared_strings: Array[String], name: String, ?streaming: bool) -> void
98
+
99
+ def each_row: (?pad_cells: bool, ?values_only: bool, ?expand_merged: bool) { (Row | row_values) -> void } -> void
100
+ | (?pad_cells: bool, ?values_only: bool, ?expand_merged: bool) -> Enumerator[Row | row_values, void]
101
+
102
+ def rows: (?values_only: bool, ?pad_cells: bool, ?expand_merged: bool) -> Enumerator[Row | row_values, void]
103
+
104
+ def max_column: () -> Integer?
105
+ def max_row: () -> Integer?
106
+ def reset_dimensions: () -> nil
107
+ def calculate_dimension: (?force: bool) -> String
108
+ end
109
+
110
+ class WriteOnlyWorkbook
111
+ attr_reader worksheets: Array[WriteOnlyWorksheet]
112
+
113
+ def initialize: () -> void
114
+ def add_sheet: (String name) -> WriteOnlyWorksheet
115
+ def save: (pathish path) -> String
116
+ def close: () -> bool
117
+ def closed?: () -> bool
118
+ end
119
+
120
+ class WriteOnlyWorksheet
121
+ attr_reader name: String
122
+
123
+ def initialize: (name: String) -> void
124
+ def <<: (row_input values) -> WriteOnlyWorksheet
125
+ def append: (row_input values) -> WriteOnlyWorksheet
126
+ def to_xml: () -> String
127
+ end
128
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbxl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Taro KOBAYASHI
@@ -43,7 +43,8 @@ dependencies:
43
43
  - - "<"
44
44
  - !ruby/object:Gem::Version
45
45
  version: '2.0'
46
- description: A small Ruby gem for read-only and write-only xlsx workflows.
46
+ description: rbxl is a Ruby gem for read-only row-by-row iteration and write-only
47
+ XLSX generation, with an optional native extension for faster XML parsing.
47
48
  email:
48
49
  - taro@matzlika.co.jp
49
50
  executables: []
@@ -51,6 +52,7 @@ extensions:
51
52
  - ext/rbxl_native/extconf.rb
52
53
  extra_rdoc_files: []
53
54
  files:
55
+ - CHANGELOG.md
54
56
  - LICENSE.txt
55
57
  - README.md
56
58
  - Rakefile
@@ -69,6 +71,7 @@ files:
69
71
  - lib/rbxl/write_only_cell.rb
70
72
  - lib/rbxl/write_only_workbook.rb
71
73
  - lib/rbxl/write_only_worksheet.rb
74
+ - sig/rbxl.rbs
72
75
  homepage: https://github.com/matzlika/rbxl
73
76
  licenses:
74
77
  - MIT
@@ -93,5 +96,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
96
  requirements: []
94
97
  rubygems_version: 4.0.3
95
98
  specification_version: 4
96
- summary: Streaming xlsx reader/writer inspired by openpyxl
99
+ summary: A fast, memory-friendly Ruby gem for row-by-row XLSX reads and append-only
100
+ writes.
97
101
  test_files: []