edifact_rails 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1343d4c915c6df4b716c31560cf58d5b350dc18ad074c12c3c562b13b0ce3717
4
- data.tar.gz: 6067a62e4bdf24f2be92306a6aa4d4e21b9aa3b69ac6eff3eba8a8ae97229627
3
+ metadata.gz: 3c51652b41747f2b0c07ecd834c5c79b3bebbb1537464a450f37ee58ea68150e
4
+ data.tar.gz: 2669313b30c7565c60f4dc568c528127577770946f7d4a8ffee7a1fc87f4c07e
5
5
  SHA512:
6
- metadata.gz: 8e1ce36c70da1e16fce1596fd43b345f5a284a77b82645b901d6eb88f25ae45c45d152aae760477526bd1c0230f59890f602c40fbded3cd3be61a80a8926e1c7
7
- data.tar.gz: 2feffb89e9df203d37eb0f8aab6bcb673630dd637092a5fe406de9eff0d244d074fd964b1d10e7c68a75a48b9f75f58168240d42e7c2d900f469468be0a049d5
6
+ metadata.gz: ded47109a99423254023e4f7316e1bdc3d1dc6602e73225a900c815baa2b2c4d9d14fe16f131aa5e1239b6b49ecab6394d7051ec9fc66923c6ffb5eb0e674ba5
7
+ data.tar.gz: f3c9d7ae8f793651c62eff11b3356321f6bcc531b5ce61020f23d530a08b0112d46e338edc8ff1711bc514fb53875508b5792795c3cbea8056e5ee97a60e52f1
data/CHANGELOG.md CHANGED
@@ -7,3 +7,17 @@
7
7
  ## 1.1.0 (27/04/2023)
8
8
 
9
9
  * Added support for TRADACOMS input
10
+
11
+ ## 1.1.1 (4/05/2023)
12
+
13
+ * Fixed crash caused by running the gem in a production environment.
14
+
15
+ ## 1.2.0 (31/05/2024)
16
+
17
+ * Added support for UNA segments. Special characters different from the defaults can now be used.
18
+ * Added `#una_special_characters` method that returns just the special characters.
19
+
20
+ ## 1.2.1 (4/06/2024)
21
+
22
+ * `#una_special_characters` method now also returns decimal notation character, default `.`.
23
+ * `#una_special_characters` method can now take no arguments, and will return the default special characters if so.
data/README.md CHANGED
@@ -4,7 +4,7 @@ This gem parses EDIFACT or TRADACOMS input, and converts it into a ruby array st
4
4
 
5
5
  It does not handle validation itself.
6
6
 
7
- This gem is heavily inspired by and attempts to output similar results as [edifact_parser](https://github.com/pvdvreede/edifact_parser), credits to [pvdvreede](https://github.com/pvdvreede)
7
+ This gem is heavily inspired by and attempts to output similar results as [edifact_parser](https://github.com/pvdvreede/edifact_parser)
8
8
 
9
9
  ## Requirements
10
10
 
@@ -15,12 +15,12 @@ This gem has been tested on the following ruby versions:
15
15
  * 3.1.2
16
16
  * 3.2.2
17
17
 
18
- ## Getting started
18
+ ## Getting started
19
19
 
20
20
  In your `Gemfile`:
21
21
 
22
22
  ```ruby
23
- gem 'edifact_rails', '~> 1.0'
23
+ gem 'edifact_rails', '~> 1.2'
24
24
  ```
25
25
 
26
26
  Otherwise:
@@ -47,6 +47,19 @@ ruby_array = EdifactRails.parse_file("your/file/path")
47
47
  ruby_array = EdifactRails.parse("LIN+1+1+0764569104:IB'QTY+1:25'")
48
48
  ```
49
49
 
50
+ You can pull just the special characters from the UNA segment (or the defaults if no UNA segment is present):
51
+ ```ruby
52
+ una_special_characters = EdifactRails.una_special_characters(your_string_input)
53
+ # una_special_characters =>
54
+ {
55
+ component_data_element_seperator: ":",
56
+ data_element_seperator: "+",
57
+ decimal_notation: ".",
58
+ escape_character: "?",
59
+ segment_seperator: "'"
60
+ }
61
+ ```
62
+
50
63
  ## Output
51
64
 
52
65
  ### EDIFACT
@@ -2,48 +2,93 @@
2
2
 
3
3
  module EdifactRails
4
4
  class Parser
5
- ESCAPE_CHARACTER = "?"
6
- SEGMENT_SEPARATOR = "'"
7
- DATA_ELEMENT_SEPARATOR = "+"
8
- COMPONENT_DATA_ELEMENT_SEPARATOR = ":"
9
-
10
5
  def initialize
11
- # Escape the special characters for use in regex later on
12
- @escape_char_rx = Regexp.quote(EdifactRails::Parser::ESCAPE_CHARACTER)
13
- @segment_separator_rx = Regexp.quote(EdifactRails::Parser::SEGMENT_SEPARATOR)
14
- @data_element_separator_rx = Regexp.quote(EdifactRails::Parser::DATA_ELEMENT_SEPARATOR)
15
- @component_data_element_separator_rx = Regexp.quote(EdifactRails::Parser::COMPONENT_DATA_ELEMENT_SEPARATOR)
6
+ # Set default separators
7
+ # They can be overridden by the UNA segment in #detect_special_characters
8
+ set_special_characters
16
9
  end
17
10
 
18
- # Treat the input a little, split the input string into segments, parse them
11
+ # Treat the input, split the input string into segments, parse those segments
19
12
  def parse(string)
13
+ # Trim newlines and excess spaces around those newlines
14
+ string = string.gsub(/\s*\n\s*/, "")
15
+
16
+ # Check for UNA segment, update special characters if so
17
+ detect_special_characters(string)
18
+
19
+ # Does some funky regex maniulation to handle escaped special characters
20
20
  string = treat_input(string)
21
21
 
22
22
  # Split the input string into segments
23
- segments = string.split(/(?<!#{@escape_char_rx})#{@segment_separator_rx}/)
23
+ segments = string.split(/(?<!#{Regexp.quote(@escape_character)})#{Regexp.quote(@segment_seperator)}/)
24
24
 
25
25
  # Detect if the input is a tradacoms file
26
26
  @is_tradacoms = segments.map { |s| s[3] }.uniq == ["="]
27
27
 
28
- # Drop the UNA segment, if present
28
+ # Drop the UNA segment, if present (we have already dealt with it in #detect_special_characters)
29
29
  segments.reject! { |s| s[0..2] == "UNA" }
30
30
 
31
31
  # Parse the segments
32
32
  segments.map { |segment| parse_segment(segment) }
33
33
  end
34
34
 
35
+ # Given an input string, return the special characters as defined by the UNA segment
36
+ # If no UNA segment is present, returns the default special characters
37
+ def una_special_characters(string)
38
+ detect_special_characters(string)
39
+
40
+ {
41
+ component_data_element_seperator: @component_data_element_seperator,
42
+ data_element_seperator: @data_element_seperator,
43
+ decimal_notation: @decimal_notation,
44
+ escape_character: @escape_character,
45
+ segment_seperator: @segment_seperator
46
+ }
47
+ end
48
+
35
49
  private
36
50
 
37
- def treat_input(string)
38
- # Trim newlines and excess spaces around those newlines
39
- string = string.gsub(/\s*\n\s*/, "")
51
+ def set_special_characters(
52
+ component_data_element_seperator =
53
+ EdifactRails::DEFAULT_SPECIAL_CHARACTERS[:component_data_element_seperator],
54
+ data_element_seperator = EdifactRails::DEFAULT_SPECIAL_CHARACTERS[:data_element_seperator],
55
+ decimal_notation = EdifactRails::DEFAULT_SPECIAL_CHARACTERS[:decimal_notation],
56
+ escape_character = EdifactRails::DEFAULT_SPECIAL_CHARACTERS[:escape_character],
57
+ segment_seperator = EdifactRails::DEFAULT_SPECIAL_CHARACTERS[:segment_seperator]
58
+ )
59
+ # Set the special characters
60
+ @component_data_element_seperator = component_data_element_seperator
61
+ @data_element_seperator = data_element_seperator
62
+ @decimal_notation = decimal_notation
63
+ @escape_character = escape_character
64
+ @segment_seperator = segment_seperator
65
+ end
66
+
67
+ def detect_special_characters(string)
68
+ # UNA tags must be at the start of the input otherwise they are ignored
69
+ return unless string[0..2] == "UNA"
40
70
 
71
+ # UNA segments look like this:
72
+ #
73
+ # UNA:+.? '
74
+ #
75
+ # UNA followed by 6 special characters which are, in order:
76
+ # 1. Component data element separator
77
+ # 2. Data element separator
78
+ # 3. Decimal notation (must be . or ,)
79
+ # 4. Release character (aka escape character)
80
+ # 5. Reserved for future use, so always a space for now
81
+ # 6. Segment terminator
82
+ set_special_characters(string[3], string[4], string[5], string[6], string[8])
83
+ end
84
+
85
+ def treat_input(string)
41
86
  # Prepare regex
42
87
  other_specials_rx = Regexp.quote(
43
88
  [
44
- EdifactRails::Parser::SEGMENT_SEPARATOR,
45
- EdifactRails::Parser::DATA_ELEMENT_SEPARATOR,
46
- EdifactRails::Parser::COMPONENT_DATA_ELEMENT_SEPARATOR
89
+ @segment_seperator,
90
+ @data_element_seperator,
91
+ @component_data_element_seperator
47
92
  ].join
48
93
  )
49
94
 
@@ -64,7 +109,10 @@ module EdifactRails
64
109
  #
65
110
  # "LIN+even????+123" => '+' is not escaped, gsub'ed => "even???? +123" => parsed => ['LIN', ['even??'], [123]]
66
111
  # "LIN+odd???+123" => '+' is escaped, not gsub'ed => "odd???+123" => parsed => ['LIN', ['odd?+123']]
67
- string.gsub(/(?<!#{@escape_char_rx})((#{@escape_char_rx}{2})+)([#{other_specials_rx}])/, '\1 \3')
112
+ string.gsub(
113
+ /(?<!#{Regexp.quote(@escape_character)})((#{Regexp.quote(@escape_character)}{2})+)([#{other_specials_rx}])/,
114
+ '\1 \3'
115
+ )
68
116
  end
69
117
 
70
118
  # Split the segment into data elements, take the first as the tag, then parse the rest
@@ -72,10 +120,10 @@ module EdifactRails
72
120
  # If the input is a tradacoms file, the segment tag will be proceeded by '=' instead of '+'
73
121
  # 'QTY=1+A:B' instead of 'QTY+1+A:B'
74
122
  # Fortunately, this is easily handled by simply changing these "="s into "+"s before the split
75
- segment[3] = EdifactRails::Parser::DATA_ELEMENT_SEPARATOR if @is_tradacoms && segment.length >= 4
123
+ segment[3] = @data_element_seperator if @is_tradacoms && segment.length >= 4
76
124
 
77
125
  # Segments are made up of data elements
78
- data_elements = segment.split(/(?<!#{@escape_char_rx})#{@data_element_separator_rx}/)
126
+ data_elements = segment.split(/(?<!#{Regexp.quote(@escape_character)})#{Regexp.quote(@data_element_seperator)}/)
79
127
 
80
128
  # The first element is the tag, pop it off
81
129
  parsed_segment = []
@@ -88,7 +136,8 @@ module EdifactRails
88
136
  # Split the data elements into component data elements, and treat them
89
137
  def parse_data_element(element)
90
138
  # Split data element into components
91
- components = element.split(/(?<!#{@escape_char_rx})#{@component_data_element_separator_rx}/)
139
+ components =
140
+ element.split(/(?<!#{Regexp.quote(@escape_character)})#{Regexp.quote(@component_data_element_seperator)}/)
92
141
 
93
142
  components.map { |component| treat_component(component) }
94
143
  end
@@ -99,16 +148,16 @@ module EdifactRails
99
148
  component.strip!
100
149
 
101
150
  # Prepare regex
102
- all_special_characters = [
103
- EdifactRails::Parser::SEGMENT_SEPARATOR,
104
- EdifactRails::Parser::DATA_ELEMENT_SEPARATOR,
105
- EdifactRails::Parser::COMPONENT_DATA_ELEMENT_SEPARATOR,
106
- EdifactRails::Parser::ESCAPE_CHARACTER
151
+ all_special_characters_string = [
152
+ @segment_seperator,
153
+ @data_element_seperator,
154
+ @component_data_element_seperator,
155
+ @escape_character
107
156
  ].join
108
157
 
109
158
  # If the component has escaped characters in it, remove the escape character and return the character as is
110
159
  # "?+" -> "+", "??" -> "?"
111
- component.gsub!(/#{@escape_char_rx}([#{Regexp.quote(all_special_characters)}])/, '\1')
160
+ component.gsub!(/#{Regexp.quote(@escape_character)}([#{Regexp.quote(all_special_characters_string)}])/, '\1')
112
161
 
113
162
  # Convert empty strings to nils
114
163
  component = nil if component.empty?
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EdifactRails
4
- VERSION = "1.1.1"
4
+ VERSION = "1.2.1"
5
5
  end
data/lib/edifact_rails.rb CHANGED
@@ -3,12 +3,25 @@
3
3
  require "edifact_rails/parser"
4
4
 
5
5
  module EdifactRails
6
+ DEFAULT_SPECIAL_CHARACTERS = {
7
+ component_data_element_seperator: ":",
8
+ data_element_seperator: "+",
9
+ decimal_notation: ".",
10
+ escape_character: "?",
11
+ segment_seperator: "'"
12
+ }.freeze
13
+
6
14
  def self.parse(string)
7
15
  parser = EdifactRails::Parser.new
8
- parser.parse string
16
+ parser.parse(string)
9
17
  end
10
18
 
11
19
  def self.parse_file(file_path)
12
20
  parse(File.read(file_path).split("\n").join)
13
21
  end
22
+
23
+ def self.una_special_characters(string = '')
24
+ parser = EdifactRails::Parser.new
25
+ parser.una_special_characters(string)
26
+ end
14
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: edifact_rails
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Blackwood
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-04 00:00:00.000000000 Z
11
+ date: 2024-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug