tabulard 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +202 -0
  3. data/README.md +43 -0
  4. data/VERSION +1 -0
  5. data/lib/sheetah/attribute.rb +60 -0
  6. data/lib/sheetah/attribute_types/composite.rb +57 -0
  7. data/lib/sheetah/attribute_types/scalar.rb +58 -0
  8. data/lib/sheetah/attribute_types/value.rb +62 -0
  9. data/lib/sheetah/attribute_types/value.rb.orig +68 -0
  10. data/lib/sheetah/attribute_types.rb +49 -0
  11. data/lib/sheetah/backends/csv.rb +92 -0
  12. data/lib/sheetah/backends/wrapper.rb +57 -0
  13. data/lib/sheetah/backends/xlsx.rb +80 -0
  14. data/lib/sheetah/backends.rb +11 -0
  15. data/lib/sheetah/column.rb +31 -0
  16. data/lib/sheetah/errors/error.rb +8 -0
  17. data/lib/sheetah/errors/spec_error.rb +10 -0
  18. data/lib/sheetah/errors/type_error.rb +10 -0
  19. data/lib/sheetah/frozen.rb +9 -0
  20. data/lib/sheetah/headers.rb +96 -0
  21. data/lib/sheetah/messaging/config.rb +19 -0
  22. data/lib/sheetah/messaging/constants.rb +17 -0
  23. data/lib/sheetah/messaging/message.rb +70 -0
  24. data/lib/sheetah/messaging/message_variant.rb +47 -0
  25. data/lib/sheetah/messaging/messages/cleaned_string.rb +18 -0
  26. data/lib/sheetah/messaging/messages/duplicated_header.rb +21 -0
  27. data/lib/sheetah/messaging/messages/invalid_header.rb +21 -0
  28. data/lib/sheetah/messaging/messages/missing_column.rb +21 -0
  29. data/lib/sheetah/messaging/messages/must_be_array.rb +18 -0
  30. data/lib/sheetah/messaging/messages/must_be_boolsy.rb +21 -0
  31. data/lib/sheetah/messaging/messages/must_be_date.rb +21 -0
  32. data/lib/sheetah/messaging/messages/must_be_email.rb +21 -0
  33. data/lib/sheetah/messaging/messages/must_be_string.rb +18 -0
  34. data/lib/sheetah/messaging/messages/must_exist.rb +18 -0
  35. data/lib/sheetah/messaging/messages/sheet_error.rb +18 -0
  36. data/lib/sheetah/messaging/messenger.rb +133 -0
  37. data/lib/sheetah/messaging/validations/base_validator.rb +43 -0
  38. data/lib/sheetah/messaging/validations/dsl.rb +31 -0
  39. data/lib/sheetah/messaging/validations/invalid_message.rb +12 -0
  40. data/lib/sheetah/messaging/validations/mixins.rb +57 -0
  41. data/lib/sheetah/messaging/validations.rb +35 -0
  42. data/lib/sheetah/messaging.rb +22 -0
  43. data/lib/sheetah/row_processor.rb +41 -0
  44. data/lib/sheetah/row_processor_result.rb +20 -0
  45. data/lib/sheetah/row_value_builder.rb +53 -0
  46. data/lib/sheetah/sheet/col_converter.rb +62 -0
  47. data/lib/sheetah/sheet.rb +107 -0
  48. data/lib/sheetah/sheet_processor.rb +61 -0
  49. data/lib/sheetah/sheet_processor_result.rb +18 -0
  50. data/lib/sheetah/specification.rb +30 -0
  51. data/lib/sheetah/template.rb +85 -0
  52. data/lib/sheetah/template_config.rb +35 -0
  53. data/lib/sheetah/types/cast.rb +20 -0
  54. data/lib/sheetah/types/cast_chain.rb +49 -0
  55. data/lib/sheetah/types/composites/array.rb +16 -0
  56. data/lib/sheetah/types/composites/array_compact.rb +13 -0
  57. data/lib/sheetah/types/composites/composite.rb +32 -0
  58. data/lib/sheetah/types/container.rb +81 -0
  59. data/lib/sheetah/types/scalars/boolsy.rb +12 -0
  60. data/lib/sheetah/types/scalars/boolsy_cast.rb +35 -0
  61. data/lib/sheetah/types/scalars/date_string.rb +12 -0
  62. data/lib/sheetah/types/scalars/date_string_cast.rb +43 -0
  63. data/lib/sheetah/types/scalars/email.rb +12 -0
  64. data/lib/sheetah/types/scalars/email_cast.rb +28 -0
  65. data/lib/sheetah/types/scalars/scalar.rb +29 -0
  66. data/lib/sheetah/types/scalars/scalar_cast.rb +49 -0
  67. data/lib/sheetah/types/scalars/string.rb +18 -0
  68. data/lib/sheetah/types/type.rb +103 -0
  69. data/lib/sheetah/utils/cell_string_cleaner.rb +29 -0
  70. data/lib/sheetah/utils/monadic_result.rb +174 -0
  71. data/lib/sheetah.rb +31 -0
  72. metadata +118 -0
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+ require_relative "../../messaging/messages/must_be_email"
5
+ require_relative "../cast"
6
+
7
+ module Sheetah
8
+ module Types
9
+ module Scalars
10
+ class EmailCast
11
+ include Cast
12
+
13
+ EMAIL_REGEXP = ::URI::MailTo::EMAIL_REGEXP
14
+ private_constant :EMAIL_REGEXP
15
+
16
+ def initialize(email_matcher: EMAIL_REGEXP, **)
17
+ @email_matcher = email_matcher
18
+ end
19
+
20
+ def call(value, _messenger)
21
+ return value if @email_matcher.match?(value)
22
+
23
+ throw :failure, Messaging::Messages::MustBeEmail.new(code_data: { value: value.inspect })
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../errors/type_error"
4
+ require_relative "../type"
5
+ require_relative "scalar_cast"
6
+
7
+ module Sheetah
8
+ module Types
9
+ module Scalars
10
+ class Scalar < Type
11
+ self.cast_classes += [ScalarCast]
12
+
13
+ def composite?
14
+ false
15
+ end
16
+
17
+ def composite(_value, _messenger)
18
+ raise Errors::TypeError, "A scalar type cannot act as a composite"
19
+ end
20
+
21
+ def scalar(index, value, messenger)
22
+ raise Errors::TypeError, "A scalar type cannot be indexed" unless index.nil?
23
+
24
+ cast_chain.call(value, messenger)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../utils/cell_string_cleaner"
4
+ require_relative "../../messaging/messages/must_exist"
5
+ require_relative "../../messaging/messages/cleaned_string"
6
+ require_relative "../cast"
7
+
8
+ module Sheetah
9
+ module Types
10
+ module Scalars
11
+ class ScalarCast
12
+ include Cast
13
+
14
+ def initialize(nullable: true, clean_string: true, **)
15
+ @nullable = nullable
16
+ @clean_string = clean_string
17
+ end
18
+
19
+ def call(value, messenger)
20
+ handle_nil(value)
21
+
22
+ handle_garbage(value, messenger)
23
+ end
24
+
25
+ private
26
+
27
+ def handle_nil(value)
28
+ return unless value.nil?
29
+
30
+ if @nullable
31
+ throw :success, nil
32
+ else
33
+ throw :failure, Messaging::Messages::MustExist.new
34
+ end
35
+ end
36
+
37
+ def handle_garbage(value, messenger)
38
+ return value unless @clean_string && value.is_a?(::String)
39
+
40
+ clean_string = Utils::CellStringCleaner.call(value)
41
+
42
+ messenger.warn(Messaging::Messages::CleanedString.new) if clean_string != value
43
+
44
+ clean_string
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "scalar"
4
+ require_relative "../../messaging/messages/must_be_string"
5
+
6
+ module Sheetah
7
+ module Types
8
+ module Scalars
9
+ String = Scalar.cast do |value, _messenger|
10
+ # value.to_s, because we want the native, underlying string when value
11
+ # is an instance of a String subclass
12
+ next value.to_s if value.is_a?(::String)
13
+
14
+ throw :failure, Messaging::Messages::MustBeString.new
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "cast_chain"
4
+
5
+ module Sheetah
6
+ module Types
7
+ class Type
8
+ class << self
9
+ def all(&block)
10
+ return enum_for(:all) unless block
11
+
12
+ ObjectSpace.each_object(singleton_class, &block)
13
+ nil
14
+ end
15
+
16
+ def cast_classes
17
+ defined?(@cast_classes) ? @cast_classes : superclass.cast_classes
18
+ end
19
+
20
+ attr_writer :cast_classes
21
+
22
+ def cast(cast_class = nil, &cast_block)
23
+ if cast_class && cast_block
24
+ raise ArgumentError, "Expected either a Class or a block, got both"
25
+ elsif !(cast_class || cast_block)
26
+ raise ArgumentError, "Expected either a Class or a block, got none"
27
+ end
28
+
29
+ type = Class.new(self)
30
+ type.cast_classes += [cast_class || SimpleCast.new(cast_block)]
31
+ type
32
+ end
33
+
34
+ def freeze
35
+ @cast_classes = cast_classes.dup unless defined?(@cast_classes)
36
+ @cast_classes.freeze
37
+ super
38
+ end
39
+
40
+ def new!(...)
41
+ new(...).freeze
42
+ end
43
+ end
44
+
45
+ self.cast_classes = []
46
+
47
+ def initialize(**opts)
48
+ @cast_chain = CastChain.new
49
+
50
+ self.class.cast_classes.each do |cast_class|
51
+ @cast_chain.append(cast_class.new(**opts))
52
+ end
53
+ end
54
+
55
+ # @private
56
+ attr_reader :cast_chain
57
+
58
+ def cast(...)
59
+ @cast_chain.call(...)
60
+ end
61
+
62
+ def scalar?
63
+ raise NoMethodError, "You must implement this method in a subclass"
64
+ end
65
+
66
+ def composite?
67
+ raise NoMethodError, "You must implement this method in a subclass"
68
+ end
69
+
70
+ def scalar(_index, _value, _messenger)
71
+ raise NoMethodError, "You must implement this method in a subclass"
72
+ end
73
+
74
+ def composite(_value, _messenger)
75
+ raise NoMethodError, "You must implement this method in a subclass"
76
+ end
77
+
78
+ def freeze
79
+ @cast_chain.freeze
80
+ super
81
+ end
82
+
83
+ # @private
84
+ class SimpleCast
85
+ def initialize(cast)
86
+ @cast = cast
87
+ end
88
+
89
+ def new(**)
90
+ @cast
91
+ end
92
+
93
+ def ==(other)
94
+ other.is_a?(self.class) && other.cast == cast
95
+ end
96
+
97
+ protected
98
+
99
+ attr_reader :cast
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sheetah
4
+ module Utils
5
+ class CellStringCleaner
6
+ garbage = "(?:[^[:print:]]|[[:space:]])+"
7
+ GARBAGE_PREFIX = /\A#{garbage}/
8
+ GARBAGE_SUFFIX = /#{garbage}\Z/
9
+ private_constant :GARBAGE_PREFIX, :GARBAGE_SUFFIX
10
+
11
+ def self.call(...)
12
+ DEFAULT.call(...)
13
+ end
14
+
15
+ def call(value)
16
+ value = value.dup
17
+
18
+ # TODO: benchmarks
19
+ value.sub!(GARBAGE_PREFIX, "")
20
+ value.sub!(GARBAGE_SUFFIX, "")
21
+
22
+ value
23
+ end
24
+
25
+ DEFAULT = new.freeze
26
+ private_constant :DEFAULT
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sheetah
4
+ module Utils
5
+ module MonadicResult
6
+ # {Unit} is a singleton, and is used when there is no other meaningful
7
+ # value that could be returned.
8
+ #
9
+ # It allows the {Result} implementation to distinguish between *a null
10
+ # value* (i.e. `nil`) and *the lack of a value*, to provide adequate
11
+ # behavior in each case.
12
+ #
13
+ # The {Result} API should not expose {Unit} directly to its consumers.
14
+ #
15
+ # @see https://en.wikipedia.org/wiki/Unit_type
16
+ Unit = Object.new
17
+
18
+ def Unit.to_s
19
+ "Unit"
20
+ end
21
+
22
+ def Unit.inspect
23
+ "Unit"
24
+ end
25
+
26
+ Unit.freeze
27
+
28
+ DO_TOKEN = :MonadicResultDo
29
+ private_constant :DO_TOKEN
30
+
31
+ module Result
32
+ UnwrapError = Class.new(StandardError)
33
+ VariantError = Class.new(UnwrapError)
34
+ ValueError = Class.new(UnwrapError)
35
+
36
+ def initialize(value = Unit)
37
+ @wrapped = value
38
+ end
39
+
40
+ def empty?
41
+ wrapped == Unit
42
+ end
43
+
44
+ def ==(other)
45
+ other.is_a?(self.class) && other.wrapped == wrapped
46
+ end
47
+
48
+ def inspect
49
+ if empty?
50
+ "#{variant}()"
51
+ else
52
+ "#{variant}(#{wrapped.inspect})"
53
+ end
54
+ end
55
+
56
+ alias to_s inspect
57
+
58
+ def discard
59
+ empty? ? self : self.class.new
60
+ end
61
+
62
+ protected
63
+
64
+ attr_reader :wrapped
65
+
66
+ private
67
+
68
+ def value
69
+ raise ValueError, "There is no value within the result" if empty?
70
+
71
+ wrapped
72
+ end
73
+
74
+ def value?
75
+ wrapped unless empty?
76
+ end
77
+
78
+ def open
79
+ if empty?
80
+ yield
81
+ else
82
+ yield wrapped
83
+ end
84
+ end
85
+ end
86
+
87
+ class Success
88
+ include Result
89
+
90
+ def success?
91
+ true
92
+ end
93
+
94
+ def failure?
95
+ false
96
+ end
97
+
98
+ def success
99
+ value
100
+ end
101
+
102
+ def failure
103
+ raise VariantError, "Not a Failure"
104
+ end
105
+
106
+ def unwrap
107
+ value?
108
+ end
109
+
110
+ alias bind open
111
+ public :bind
112
+
113
+ alias or itself
114
+
115
+ private
116
+
117
+ def variant
118
+ "Success"
119
+ end
120
+ end
121
+
122
+ class Failure
123
+ include Result
124
+
125
+ def success?
126
+ false
127
+ end
128
+
129
+ def failure?
130
+ true
131
+ end
132
+
133
+ def success
134
+ raise VariantError, "Not a Success"
135
+ end
136
+
137
+ def failure
138
+ value
139
+ end
140
+
141
+ def unwrap
142
+ throw DO_TOKEN, self
143
+ end
144
+
145
+ alias bind itself
146
+
147
+ alias or open
148
+ public :or
149
+
150
+ private
151
+
152
+ def variant
153
+ "Failure"
154
+ end
155
+ end
156
+
157
+ # rubocop:disable Naming/MethodName
158
+
159
+ def Success(...)
160
+ Success.new(...)
161
+ end
162
+
163
+ def Failure(...)
164
+ Failure.new(...)
165
+ end
166
+
167
+ def Do(&block)
168
+ catch(DO_TOKEN, &block)
169
+ end
170
+
171
+ # rubocop:enable Naming/MethodName
172
+ end
173
+ end
174
+ end
data/lib/sheetah.rb ADDED
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # {Sheetah} is a library designed to process tabular data according to a
4
+ # {Sheetah::Template developer-defined structure}. It will turn each row into a
5
+ # object whose keys and types are specified by the structure.
6
+ #
7
+ # It can work with tabular data presented in different formats by delegating
8
+ # the parsing of documents to specialized backends
9
+ # ({Sheetah::Backends::Xlsx}, {Sheetah::Backends::Csv}, etc...).
10
+ #
11
+ # Given a tabular document and a specification of the document structure,
12
+ # Sheetah may process the document by handling the following tasks:
13
+ #
14
+ # - validation of the document's actual structure
15
+ # - arbitrary complex typecasting of each row into a validated object,
16
+ # according to the document specification
17
+ # - fine-grained error handling (at the sheet/row/col/cell level)
18
+ # - all of the above done so that internationalization of messages is easy
19
+ #
20
+ # Sheetah is designed with memory efficiency in mind by processing documents
21
+ # one row at a time, thus not requiring parsing and loading the whole document
22
+ # in memory upfront (depending on the backend). The memory consumption of the
23
+ # library should therefore theoretically stay stable during the processing of a
24
+ # document, disregarding how many rows it may have.
25
+ module Sheetah
26
+ end
27
+
28
+ require "sheetah/template"
29
+ require "sheetah/template_config"
30
+ require "sheetah/sheet_processor"
31
+ require "sheetah/backends/wrapper"
metadata ADDED
@@ -0,0 +1,118 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tabulard
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Erwan Thomas
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-06-05 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - id@maen.fr
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - LICENSE
21
+ - README.md
22
+ - VERSION
23
+ - lib/sheetah.rb
24
+ - lib/sheetah/attribute.rb
25
+ - lib/sheetah/attribute_types.rb
26
+ - lib/sheetah/attribute_types/composite.rb
27
+ - lib/sheetah/attribute_types/scalar.rb
28
+ - lib/sheetah/attribute_types/value.rb
29
+ - lib/sheetah/attribute_types/value.rb.orig
30
+ - lib/sheetah/backends.rb
31
+ - lib/sheetah/backends/csv.rb
32
+ - lib/sheetah/backends/wrapper.rb
33
+ - lib/sheetah/backends/xlsx.rb
34
+ - lib/sheetah/column.rb
35
+ - lib/sheetah/errors/error.rb
36
+ - lib/sheetah/errors/spec_error.rb
37
+ - lib/sheetah/errors/type_error.rb
38
+ - lib/sheetah/frozen.rb
39
+ - lib/sheetah/headers.rb
40
+ - lib/sheetah/messaging.rb
41
+ - lib/sheetah/messaging/config.rb
42
+ - lib/sheetah/messaging/constants.rb
43
+ - lib/sheetah/messaging/message.rb
44
+ - lib/sheetah/messaging/message_variant.rb
45
+ - lib/sheetah/messaging/messages/cleaned_string.rb
46
+ - lib/sheetah/messaging/messages/duplicated_header.rb
47
+ - lib/sheetah/messaging/messages/invalid_header.rb
48
+ - lib/sheetah/messaging/messages/missing_column.rb
49
+ - lib/sheetah/messaging/messages/must_be_array.rb
50
+ - lib/sheetah/messaging/messages/must_be_boolsy.rb
51
+ - lib/sheetah/messaging/messages/must_be_date.rb
52
+ - lib/sheetah/messaging/messages/must_be_email.rb
53
+ - lib/sheetah/messaging/messages/must_be_string.rb
54
+ - lib/sheetah/messaging/messages/must_exist.rb
55
+ - lib/sheetah/messaging/messages/sheet_error.rb
56
+ - lib/sheetah/messaging/messenger.rb
57
+ - lib/sheetah/messaging/validations.rb
58
+ - lib/sheetah/messaging/validations/base_validator.rb
59
+ - lib/sheetah/messaging/validations/dsl.rb
60
+ - lib/sheetah/messaging/validations/invalid_message.rb
61
+ - lib/sheetah/messaging/validations/mixins.rb
62
+ - lib/sheetah/row_processor.rb
63
+ - lib/sheetah/row_processor_result.rb
64
+ - lib/sheetah/row_value_builder.rb
65
+ - lib/sheetah/sheet.rb
66
+ - lib/sheetah/sheet/col_converter.rb
67
+ - lib/sheetah/sheet_processor.rb
68
+ - lib/sheetah/sheet_processor_result.rb
69
+ - lib/sheetah/specification.rb
70
+ - lib/sheetah/template.rb
71
+ - lib/sheetah/template_config.rb
72
+ - lib/sheetah/types/cast.rb
73
+ - lib/sheetah/types/cast_chain.rb
74
+ - lib/sheetah/types/composites/array.rb
75
+ - lib/sheetah/types/composites/array_compact.rb
76
+ - lib/sheetah/types/composites/composite.rb
77
+ - lib/sheetah/types/container.rb
78
+ - lib/sheetah/types/scalars/boolsy.rb
79
+ - lib/sheetah/types/scalars/boolsy_cast.rb
80
+ - lib/sheetah/types/scalars/date_string.rb
81
+ - lib/sheetah/types/scalars/date_string_cast.rb
82
+ - lib/sheetah/types/scalars/email.rb
83
+ - lib/sheetah/types/scalars/email_cast.rb
84
+ - lib/sheetah/types/scalars/scalar.rb
85
+ - lib/sheetah/types/scalars/scalar_cast.rb
86
+ - lib/sheetah/types/scalars/string.rb
87
+ - lib/sheetah/types/type.rb
88
+ - lib/sheetah/utils/cell_string_cleaner.rb
89
+ - lib/sheetah/utils/monadic_result.rb
90
+ homepage: https://github.com/tabulard/tabulard
91
+ licenses:
92
+ - Apache-2.0
93
+ metadata:
94
+ source_code_uri: https://github.com/tabulard/tabulard
95
+ bug_tracker_uri: https://github.com/tabulard/tabulard/issues
96
+ changelog_uri: https://github.com/tabulard/tabulard/blob/master/CHANGELOG.md
97
+ documentation_uri: https://github.com/tabulard/tabulard/blob/master/README.md
98
+ rubygems_mfa_required: 'true'
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: '3.0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ requirements: []
114
+ rubygems_version: 3.5.11
115
+ signing_key:
116
+ specification_version: 4
117
+ summary: A highly-customizable tabular data processor
118
+ test_files: []