tabulard 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +202 -0
  3. data/README.md +43 -0
  4. data/VERSION +1 -0
  5. data/lib/sheetah/attribute.rb +60 -0
  6. data/lib/sheetah/attribute_types/composite.rb +57 -0
  7. data/lib/sheetah/attribute_types/scalar.rb +58 -0
  8. data/lib/sheetah/attribute_types/value.rb +62 -0
  9. data/lib/sheetah/attribute_types/value.rb.orig +68 -0
  10. data/lib/sheetah/attribute_types.rb +49 -0
  11. data/lib/sheetah/backends/csv.rb +92 -0
  12. data/lib/sheetah/backends/wrapper.rb +57 -0
  13. data/lib/sheetah/backends/xlsx.rb +80 -0
  14. data/lib/sheetah/backends.rb +11 -0
  15. data/lib/sheetah/column.rb +31 -0
  16. data/lib/sheetah/errors/error.rb +8 -0
  17. data/lib/sheetah/errors/spec_error.rb +10 -0
  18. data/lib/sheetah/errors/type_error.rb +10 -0
  19. data/lib/sheetah/frozen.rb +9 -0
  20. data/lib/sheetah/headers.rb +96 -0
  21. data/lib/sheetah/messaging/config.rb +19 -0
  22. data/lib/sheetah/messaging/constants.rb +17 -0
  23. data/lib/sheetah/messaging/message.rb +70 -0
  24. data/lib/sheetah/messaging/message_variant.rb +47 -0
  25. data/lib/sheetah/messaging/messages/cleaned_string.rb +18 -0
  26. data/lib/sheetah/messaging/messages/duplicated_header.rb +21 -0
  27. data/lib/sheetah/messaging/messages/invalid_header.rb +21 -0
  28. data/lib/sheetah/messaging/messages/missing_column.rb +21 -0
  29. data/lib/sheetah/messaging/messages/must_be_array.rb +18 -0
  30. data/lib/sheetah/messaging/messages/must_be_boolsy.rb +21 -0
  31. data/lib/sheetah/messaging/messages/must_be_date.rb +21 -0
  32. data/lib/sheetah/messaging/messages/must_be_email.rb +21 -0
  33. data/lib/sheetah/messaging/messages/must_be_string.rb +18 -0
  34. data/lib/sheetah/messaging/messages/must_exist.rb +18 -0
  35. data/lib/sheetah/messaging/messages/sheet_error.rb +18 -0
  36. data/lib/sheetah/messaging/messenger.rb +133 -0
  37. data/lib/sheetah/messaging/validations/base_validator.rb +43 -0
  38. data/lib/sheetah/messaging/validations/dsl.rb +31 -0
  39. data/lib/sheetah/messaging/validations/invalid_message.rb +12 -0
  40. data/lib/sheetah/messaging/validations/mixins.rb +57 -0
  41. data/lib/sheetah/messaging/validations.rb +35 -0
  42. data/lib/sheetah/messaging.rb +22 -0
  43. data/lib/sheetah/row_processor.rb +41 -0
  44. data/lib/sheetah/row_processor_result.rb +20 -0
  45. data/lib/sheetah/row_value_builder.rb +53 -0
  46. data/lib/sheetah/sheet/col_converter.rb +62 -0
  47. data/lib/sheetah/sheet.rb +107 -0
  48. data/lib/sheetah/sheet_processor.rb +61 -0
  49. data/lib/sheetah/sheet_processor_result.rb +18 -0
  50. data/lib/sheetah/specification.rb +30 -0
  51. data/lib/sheetah/template.rb +85 -0
  52. data/lib/sheetah/template_config.rb +35 -0
  53. data/lib/sheetah/types/cast.rb +20 -0
  54. data/lib/sheetah/types/cast_chain.rb +49 -0
  55. data/lib/sheetah/types/composites/array.rb +16 -0
  56. data/lib/sheetah/types/composites/array_compact.rb +13 -0
  57. data/lib/sheetah/types/composites/composite.rb +32 -0
  58. data/lib/sheetah/types/container.rb +81 -0
  59. data/lib/sheetah/types/scalars/boolsy.rb +12 -0
  60. data/lib/sheetah/types/scalars/boolsy_cast.rb +35 -0
  61. data/lib/sheetah/types/scalars/date_string.rb +12 -0
  62. data/lib/sheetah/types/scalars/date_string_cast.rb +43 -0
  63. data/lib/sheetah/types/scalars/email.rb +12 -0
  64. data/lib/sheetah/types/scalars/email_cast.rb +28 -0
  65. data/lib/sheetah/types/scalars/scalar.rb +29 -0
  66. data/lib/sheetah/types/scalars/scalar_cast.rb +49 -0
  67. data/lib/sheetah/types/scalars/string.rb +18 -0
  68. data/lib/sheetah/types/type.rb +103 -0
  69. data/lib/sheetah/utils/cell_string_cleaner.rb +29 -0
  70. data/lib/sheetah/utils/monadic_result.rb +174 -0
  71. data/lib/sheetah.rb +31 -0
  72. metadata +118 -0
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+ require_relative "../../messaging/messages/must_be_email"
5
+ require_relative "../cast"
6
+
7
+ module Sheetah
8
+ module Types
9
+ module Scalars
10
+ class EmailCast
11
+ include Cast
12
+
13
+ EMAIL_REGEXP = ::URI::MailTo::EMAIL_REGEXP
14
+ private_constant :EMAIL_REGEXP
15
+
16
+ def initialize(email_matcher: EMAIL_REGEXP, **)
17
+ @email_matcher = email_matcher
18
+ end
19
+
20
+ def call(value, _messenger)
21
+ return value if @email_matcher.match?(value)
22
+
23
+ throw :failure, Messaging::Messages::MustBeEmail.new(code_data: { value: value.inspect })
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../errors/type_error"
4
+ require_relative "../type"
5
+ require_relative "scalar_cast"
6
+
7
+ module Sheetah
8
+ module Types
9
+ module Scalars
10
+ class Scalar < Type
11
+ self.cast_classes += [ScalarCast]
12
+
13
+ def composite?
14
+ false
15
+ end
16
+
17
+ def composite(_value, _messenger)
18
+ raise Errors::TypeError, "A scalar type cannot act as a composite"
19
+ end
20
+
21
+ def scalar(index, value, messenger)
22
+ raise Errors::TypeError, "A scalar type cannot be indexed" unless index.nil?
23
+
24
+ cast_chain.call(value, messenger)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../utils/cell_string_cleaner"
4
+ require_relative "../../messaging/messages/must_exist"
5
+ require_relative "../../messaging/messages/cleaned_string"
6
+ require_relative "../cast"
7
+
8
+ module Sheetah
9
+ module Types
10
+ module Scalars
11
+ class ScalarCast
12
+ include Cast
13
+
14
+ def initialize(nullable: true, clean_string: true, **)
15
+ @nullable = nullable
16
+ @clean_string = clean_string
17
+ end
18
+
19
+ def call(value, messenger)
20
+ handle_nil(value)
21
+
22
+ handle_garbage(value, messenger)
23
+ end
24
+
25
+ private
26
+
27
+ def handle_nil(value)
28
+ return unless value.nil?
29
+
30
+ if @nullable
31
+ throw :success, nil
32
+ else
33
+ throw :failure, Messaging::Messages::MustExist.new
34
+ end
35
+ end
36
+
37
+ def handle_garbage(value, messenger)
38
+ return value unless @clean_string && value.is_a?(::String)
39
+
40
+ clean_string = Utils::CellStringCleaner.call(value)
41
+
42
+ messenger.warn(Messaging::Messages::CleanedString.new) if clean_string != value
43
+
44
+ clean_string
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "scalar"
4
+ require_relative "../../messaging/messages/must_be_string"
5
+
6
+ module Sheetah
7
+ module Types
8
+ module Scalars
9
+ String = Scalar.cast do |value, _messenger|
10
+ # value.to_s, because we want the native, underlying string when value
11
+ # is an instance of a String subclass
12
+ next value.to_s if value.is_a?(::String)
13
+
14
+ throw :failure, Messaging::Messages::MustBeString.new
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "cast_chain"
4
+
5
+ module Sheetah
6
+ module Types
7
+ class Type
8
+ class << self
9
+ def all(&block)
10
+ return enum_for(:all) unless block
11
+
12
+ ObjectSpace.each_object(singleton_class, &block)
13
+ nil
14
+ end
15
+
16
+ def cast_classes
17
+ defined?(@cast_classes) ? @cast_classes : superclass.cast_classes
18
+ end
19
+
20
+ attr_writer :cast_classes
21
+
22
+ def cast(cast_class = nil, &cast_block)
23
+ if cast_class && cast_block
24
+ raise ArgumentError, "Expected either a Class or a block, got both"
25
+ elsif !(cast_class || cast_block)
26
+ raise ArgumentError, "Expected either a Class or a block, got none"
27
+ end
28
+
29
+ type = Class.new(self)
30
+ type.cast_classes += [cast_class || SimpleCast.new(cast_block)]
31
+ type
32
+ end
33
+
34
+ def freeze
35
+ @cast_classes = cast_classes.dup unless defined?(@cast_classes)
36
+ @cast_classes.freeze
37
+ super
38
+ end
39
+
40
+ def new!(...)
41
+ new(...).freeze
42
+ end
43
+ end
44
+
45
+ self.cast_classes = []
46
+
47
+ def initialize(**opts)
48
+ @cast_chain = CastChain.new
49
+
50
+ self.class.cast_classes.each do |cast_class|
51
+ @cast_chain.append(cast_class.new(**opts))
52
+ end
53
+ end
54
+
55
+ # @private
56
+ attr_reader :cast_chain
57
+
58
+ def cast(...)
59
+ @cast_chain.call(...)
60
+ end
61
+
62
+ def scalar?
63
+ raise NoMethodError, "You must implement this method in a subclass"
64
+ end
65
+
66
+ def composite?
67
+ raise NoMethodError, "You must implement this method in a subclass"
68
+ end
69
+
70
+ def scalar(_index, _value, _messenger)
71
+ raise NoMethodError, "You must implement this method in a subclass"
72
+ end
73
+
74
+ def composite(_value, _messenger)
75
+ raise NoMethodError, "You must implement this method in a subclass"
76
+ end
77
+
78
+ def freeze
79
+ @cast_chain.freeze
80
+ super
81
+ end
82
+
83
+ # @private
84
+ class SimpleCast
85
+ def initialize(cast)
86
+ @cast = cast
87
+ end
88
+
89
+ def new(**)
90
+ @cast
91
+ end
92
+
93
+ def ==(other)
94
+ other.is_a?(self.class) && other.cast == cast
95
+ end
96
+
97
+ protected
98
+
99
+ attr_reader :cast
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sheetah
4
+ module Utils
5
+ class CellStringCleaner
6
+ garbage = "(?:[^[:print:]]|[[:space:]])+"
7
+ GARBAGE_PREFIX = /\A#{garbage}/
8
+ GARBAGE_SUFFIX = /#{garbage}\Z/
9
+ private_constant :GARBAGE_PREFIX, :GARBAGE_SUFFIX
10
+
11
+ def self.call(...)
12
+ DEFAULT.call(...)
13
+ end
14
+
15
+ def call(value)
16
+ value = value.dup
17
+
18
+ # TODO: benchmarks
19
+ value.sub!(GARBAGE_PREFIX, "")
20
+ value.sub!(GARBAGE_SUFFIX, "")
21
+
22
+ value
23
+ end
24
+
25
+ DEFAULT = new.freeze
26
+ private_constant :DEFAULT
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sheetah
4
+ module Utils
5
+ module MonadicResult
6
+ # {Unit} is a singleton, and is used when there is no other meaningful
7
+ # value that could be returned.
8
+ #
9
+ # It allows the {Result} implementation to distinguish between *a null
10
+ # value* (i.e. `nil`) and *the lack of a value*, to provide adequate
11
+ # behavior in each case.
12
+ #
13
+ # The {Result} API should not expose {Unit} directly to its consumers.
14
+ #
15
+ # @see https://en.wikipedia.org/wiki/Unit_type
16
+ Unit = Object.new
17
+
18
+ def Unit.to_s
19
+ "Unit"
20
+ end
21
+
22
+ def Unit.inspect
23
+ "Unit"
24
+ end
25
+
26
+ Unit.freeze
27
+
28
+ DO_TOKEN = :MonadicResultDo
29
+ private_constant :DO_TOKEN
30
+
31
+ module Result
32
+ UnwrapError = Class.new(StandardError)
33
+ VariantError = Class.new(UnwrapError)
34
+ ValueError = Class.new(UnwrapError)
35
+
36
+ def initialize(value = Unit)
37
+ @wrapped = value
38
+ end
39
+
40
+ def empty?
41
+ wrapped == Unit
42
+ end
43
+
44
+ def ==(other)
45
+ other.is_a?(self.class) && other.wrapped == wrapped
46
+ end
47
+
48
+ def inspect
49
+ if empty?
50
+ "#{variant}()"
51
+ else
52
+ "#{variant}(#{wrapped.inspect})"
53
+ end
54
+ end
55
+
56
+ alias to_s inspect
57
+
58
+ def discard
59
+ empty? ? self : self.class.new
60
+ end
61
+
62
+ protected
63
+
64
+ attr_reader :wrapped
65
+
66
+ private
67
+
68
+ def value
69
+ raise ValueError, "There is no value within the result" if empty?
70
+
71
+ wrapped
72
+ end
73
+
74
+ def value?
75
+ wrapped unless empty?
76
+ end
77
+
78
+ def open
79
+ if empty?
80
+ yield
81
+ else
82
+ yield wrapped
83
+ end
84
+ end
85
+ end
86
+
87
+ class Success
88
+ include Result
89
+
90
+ def success?
91
+ true
92
+ end
93
+
94
+ def failure?
95
+ false
96
+ end
97
+
98
+ def success
99
+ value
100
+ end
101
+
102
+ def failure
103
+ raise VariantError, "Not a Failure"
104
+ end
105
+
106
+ def unwrap
107
+ value?
108
+ end
109
+
110
+ alias bind open
111
+ public :bind
112
+
113
+ alias or itself
114
+
115
+ private
116
+
117
+ def variant
118
+ "Success"
119
+ end
120
+ end
121
+
122
+ class Failure
123
+ include Result
124
+
125
+ def success?
126
+ false
127
+ end
128
+
129
+ def failure?
130
+ true
131
+ end
132
+
133
+ def success
134
+ raise VariantError, "Not a Success"
135
+ end
136
+
137
+ def failure
138
+ value
139
+ end
140
+
141
+ def unwrap
142
+ throw DO_TOKEN, self
143
+ end
144
+
145
+ alias bind itself
146
+
147
+ alias or open
148
+ public :or
149
+
150
+ private
151
+
152
+ def variant
153
+ "Failure"
154
+ end
155
+ end
156
+
157
+ # rubocop:disable Naming/MethodName
158
+
159
+ def Success(...)
160
+ Success.new(...)
161
+ end
162
+
163
+ def Failure(...)
164
+ Failure.new(...)
165
+ end
166
+
167
+ def Do(&block)
168
+ catch(DO_TOKEN, &block)
169
+ end
170
+
171
+ # rubocop:enable Naming/MethodName
172
+ end
173
+ end
174
+ end
data/lib/sheetah.rb ADDED
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # {Sheetah} is a library designed to process tabular data according to a
4
+ # {Sheetah::Template developer-defined structure}. It will turn each row into a
5
+ # object whose keys and types are specified by the structure.
6
+ #
7
+ # It can work with tabular data presented in different formats by delegating
8
+ # the parsing of documents to specialized backends
9
+ # ({Sheetah::Backends::Xlsx}, {Sheetah::Backends::Csv}, etc...).
10
+ #
11
+ # Given a tabular document and a specification of the document structure,
12
+ # Sheetah may process the document by handling the following tasks:
13
+ #
14
+ # - validation of the document's actual structure
15
+ # - arbitrary complex typecasting of each row into a validated object,
16
+ # according to the document specification
17
+ # - fine-grained error handling (at the sheet/row/col/cell level)
18
+ # - all of the above done so that internationalization of messages is easy
19
+ #
20
+ # Sheetah is designed with memory efficiency in mind by processing documents
21
+ # one row at a time, thus not requiring parsing and loading the whole document
22
+ # in memory upfront (depending on the backend). The memory consumption of the
23
+ # library should therefore theoretically stay stable during the processing of a
24
+ # document, disregarding how many rows it may have.
25
+ module Sheetah
26
+ end
27
+
28
+ require "sheetah/template"
29
+ require "sheetah/template_config"
30
+ require "sheetah/sheet_processor"
31
+ require "sheetah/backends/wrapper"
metadata ADDED
@@ -0,0 +1,118 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tabulard
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Erwan Thomas
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-06-05 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - id@maen.fr
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - LICENSE
21
+ - README.md
22
+ - VERSION
23
+ - lib/sheetah.rb
24
+ - lib/sheetah/attribute.rb
25
+ - lib/sheetah/attribute_types.rb
26
+ - lib/sheetah/attribute_types/composite.rb
27
+ - lib/sheetah/attribute_types/scalar.rb
28
+ - lib/sheetah/attribute_types/value.rb
29
+ - lib/sheetah/attribute_types/value.rb.orig
30
+ - lib/sheetah/backends.rb
31
+ - lib/sheetah/backends/csv.rb
32
+ - lib/sheetah/backends/wrapper.rb
33
+ - lib/sheetah/backends/xlsx.rb
34
+ - lib/sheetah/column.rb
35
+ - lib/sheetah/errors/error.rb
36
+ - lib/sheetah/errors/spec_error.rb
37
+ - lib/sheetah/errors/type_error.rb
38
+ - lib/sheetah/frozen.rb
39
+ - lib/sheetah/headers.rb
40
+ - lib/sheetah/messaging.rb
41
+ - lib/sheetah/messaging/config.rb
42
+ - lib/sheetah/messaging/constants.rb
43
+ - lib/sheetah/messaging/message.rb
44
+ - lib/sheetah/messaging/message_variant.rb
45
+ - lib/sheetah/messaging/messages/cleaned_string.rb
46
+ - lib/sheetah/messaging/messages/duplicated_header.rb
47
+ - lib/sheetah/messaging/messages/invalid_header.rb
48
+ - lib/sheetah/messaging/messages/missing_column.rb
49
+ - lib/sheetah/messaging/messages/must_be_array.rb
50
+ - lib/sheetah/messaging/messages/must_be_boolsy.rb
51
+ - lib/sheetah/messaging/messages/must_be_date.rb
52
+ - lib/sheetah/messaging/messages/must_be_email.rb
53
+ - lib/sheetah/messaging/messages/must_be_string.rb
54
+ - lib/sheetah/messaging/messages/must_exist.rb
55
+ - lib/sheetah/messaging/messages/sheet_error.rb
56
+ - lib/sheetah/messaging/messenger.rb
57
+ - lib/sheetah/messaging/validations.rb
58
+ - lib/sheetah/messaging/validations/base_validator.rb
59
+ - lib/sheetah/messaging/validations/dsl.rb
60
+ - lib/sheetah/messaging/validations/invalid_message.rb
61
+ - lib/sheetah/messaging/validations/mixins.rb
62
+ - lib/sheetah/row_processor.rb
63
+ - lib/sheetah/row_processor_result.rb
64
+ - lib/sheetah/row_value_builder.rb
65
+ - lib/sheetah/sheet.rb
66
+ - lib/sheetah/sheet/col_converter.rb
67
+ - lib/sheetah/sheet_processor.rb
68
+ - lib/sheetah/sheet_processor_result.rb
69
+ - lib/sheetah/specification.rb
70
+ - lib/sheetah/template.rb
71
+ - lib/sheetah/template_config.rb
72
+ - lib/sheetah/types/cast.rb
73
+ - lib/sheetah/types/cast_chain.rb
74
+ - lib/sheetah/types/composites/array.rb
75
+ - lib/sheetah/types/composites/array_compact.rb
76
+ - lib/sheetah/types/composites/composite.rb
77
+ - lib/sheetah/types/container.rb
78
+ - lib/sheetah/types/scalars/boolsy.rb
79
+ - lib/sheetah/types/scalars/boolsy_cast.rb
80
+ - lib/sheetah/types/scalars/date_string.rb
81
+ - lib/sheetah/types/scalars/date_string_cast.rb
82
+ - lib/sheetah/types/scalars/email.rb
83
+ - lib/sheetah/types/scalars/email_cast.rb
84
+ - lib/sheetah/types/scalars/scalar.rb
85
+ - lib/sheetah/types/scalars/scalar_cast.rb
86
+ - lib/sheetah/types/scalars/string.rb
87
+ - lib/sheetah/types/type.rb
88
+ - lib/sheetah/utils/cell_string_cleaner.rb
89
+ - lib/sheetah/utils/monadic_result.rb
90
+ homepage: https://github.com/tabulard/tabulard
91
+ licenses:
92
+ - Apache-2.0
93
+ metadata:
94
+ source_code_uri: https://github.com/tabulard/tabulard
95
+ bug_tracker_uri: https://github.com/tabulard/tabulard/issues
96
+ changelog_uri: https://github.com/tabulard/tabulard/blob/master/CHANGELOG.md
97
+ documentation_uri: https://github.com/tabulard/tabulard/blob/master/README.md
98
+ rubygems_mfa_required: 'true'
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: '3.0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ requirements: []
114
+ rubygems_version: 3.5.11
115
+ signing_key:
116
+ specification_version: 4
117
+ summary: A highly-customizable tabular data processor
118
+ test_files: []