RubyGems - edifact_rails - Versions diffs - 1.1.1 → 1.2.1 - Mend

edifact_rails 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +14 -0
data/README.md +16 -3
data/lib/edifact_rails/parser.rb +78 -29
data/lib/edifact_rails/version.rb +1 -1
data/lib/edifact_rails.rb +14 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1343d4c915c6df4b716c31560cf58d5b350dc18ad074c12c3c562b13b0ce3717
-  data.tar.gz: 6067a62e4bdf24f2be92306a6aa4d4e21b9aa3b69ac6eff3eba8a8ae97229627
+  metadata.gz: 3c51652b41747f2b0c07ecd834c5c79b3bebbb1537464a450f37ee58ea68150e
+  data.tar.gz: 2669313b30c7565c60f4dc568c528127577770946f7d4a8ffee7a1fc87f4c07e
 SHA512:
-  metadata.gz: 8e1ce36c70da1e16fce1596fd43b345f5a284a77b82645b901d6eb88f25ae45c45d152aae760477526bd1c0230f59890f602c40fbded3cd3be61a80a8926e1c7
-  data.tar.gz: 2feffb89e9df203d37eb0f8aab6bcb673630dd637092a5fe406de9eff0d244d074fd964b1d10e7c68a75a48b9f75f58168240d42e7c2d900f469468be0a049d5
+  metadata.gz: ded47109a99423254023e4f7316e1bdc3d1dc6602e73225a900c815baa2b2c4d9d14fe16f131aa5e1239b6b49ecab6394d7051ec9fc66923c6ffb5eb0e674ba5
+  data.tar.gz: f3c9d7ae8f793651c62eff11b3356321f6bcc531b5ce61020f23d530a08b0112d46e338edc8ff1711bc514fb53875508b5792795c3cbea8056e5ee97a60e52f1

data/CHANGELOG.md CHANGED Viewed

@@ -7,3 +7,17 @@
 ## 1.1.0 (27/04/2023)
 * Added support for TRADACOMS input
+## 1.1.1 (4/05/2023)
+* Fixed crash caused by running the gem in a production environment.
+## 1.2.0 (31/05/2024)
+* Added support for UNA segments. Special characters different from the defaults can now be used.
+* Added `#una_special_characters` method that returns just the special characters.
+## 1.2.1 (4/06/2024)
+* `#una_special_characters` method now also returns decimal notation character, default `.`.
+* `#una_special_characters` method can now take no arguments, and will return the default special characters if so.

data/README.md CHANGED Viewed

@@ -4,7 +4,7 @@ This gem parses EDIFACT or TRADACOMS input, and converts it into a ruby array st
 It does not handle validation itself.
-This gem is heavily inspired by and attempts to output similar results as [edifact_parser](https://github.com/pvdvreede/edifact_parser), credits to [pvdvreede](https://github.com/pvdvreede)
+This gem is heavily inspired by and attempts to output similar results as [edifact_parser](https://github.com/pvdvreede/edifact_parser)
 ## Requirements
@@ -15,12 +15,12 @@ This gem has been tested on the following ruby versions:
 * 3.1.2
 * 3.2.2
-## Getting started
+## Getting started
 In your `Gemfile`:
 ```ruby
-gem 'edifact_rails', '~> 1.0'
+gem 'edifact_rails', '~> 1.2'
 ```
 Otherwise:
@@ -47,6 +47,19 @@ ruby_array = EdifactRails.parse_file("your/file/path")
 ruby_array = EdifactRails.parse("LIN+1+1+0764569104:IB'QTY+1:25'")
 ```
+You can pull just the special characters from the UNA segment (or the defaults if no UNA segment is present):
+```ruby
+una_special_characters = EdifactRails.una_special_characters(your_string_input)
+# una_special_characters =>
+{
+  component_data_element_seperator: ":",
+  data_element_seperator: "+",
+  decimal_notation: ".",
+  escape_character: "?",
+  segment_seperator: "'"
+}
+```
 ## Output
 ### EDIFACT

data/lib/edifact_rails/parser.rb CHANGED Viewed

@@ -2,48 +2,93 @@
 module EdifactRails
   class Parser
-    ESCAPE_CHARACTER = "?"
-    SEGMENT_SEPARATOR = "'"
-    DATA_ELEMENT_SEPARATOR = "+"
-    COMPONENT_DATA_ELEMENT_SEPARATOR = ":"
     def initialize
-      # Escape the special characters for use in regex later on
-      @escape_char_rx = Regexp.quote(EdifactRails::Parser::ESCAPE_CHARACTER)
-      @segment_separator_rx = Regexp.quote(EdifactRails::Parser::SEGMENT_SEPARATOR)
-      @data_element_separator_rx = Regexp.quote(EdifactRails::Parser::DATA_ELEMENT_SEPARATOR)
-      @component_data_element_separator_rx = Regexp.quote(EdifactRails::Parser::COMPONENT_DATA_ELEMENT_SEPARATOR)
+      # Set default separators
+      # They can be overridden by the UNA segment in #detect_special_characters
+      set_special_characters
     end
-    # Treat the input a little, split the input string into segments, parse them
+    # Treat the input, split the input string into segments, parse those segments
     def parse(string)
+      # Trim newlines and excess spaces around those newlines
+      string = string.gsub(/\s*\n\s*/, "")
+      # Check for UNA segment, update special characters if so
+      detect_special_characters(string)
+      # Does some funky regex maniulation to handle escaped special characters
       string = treat_input(string)
       # Split the input string into segments
-      segments = string.split(/(?<!#{@escape_char_rx})#{@segment_separator_rx}/)
+      segments = string.split(/(?<!#{Regexp.quote(@escape_character)})#{Regexp.quote(@segment_seperator)}/)
       # Detect if the input is a tradacoms file
       @is_tradacoms = segments.map { |s| s[3] }.uniq == ["="]
-      # Drop the UNA segment, if present
+      # Drop the UNA segment, if present (we have already dealt with it in #detect_special_characters)
       segments.reject! { |s| s[0..2] == "UNA" }
       # Parse the segments
       segments.map { |segment| parse_segment(segment) }
     end
+    # Given an input string, return the special characters as defined by the UNA segment
+    # If no UNA segment is present, returns the default special characters
+    def una_special_characters(string)
+      detect_special_characters(string)
+      {
+        component_data_element_seperator: @component_data_element_seperator,
+        data_element_seperator: @data_element_seperator,
+        decimal_notation: @decimal_notation,
+        escape_character: @escape_character,
+        segment_seperator: @segment_seperator
+      }
+    end
     private
-    def treat_input(string)
-      # Trim newlines and excess spaces around those newlines
-      string = string.gsub(/\s*\n\s*/, "")
+    def set_special_characters(
+      component_data_element_seperator =
+        EdifactRails::DEFAULT_SPECIAL_CHARACTERS[:component_data_element_seperator],
+      data_element_seperator = EdifactRails::DEFAULT_SPECIAL_CHARACTERS[:data_element_seperator],
+      decimal_notation = EdifactRails::DEFAULT_SPECIAL_CHARACTERS[:decimal_notation],
+      escape_character = EdifactRails::DEFAULT_SPECIAL_CHARACTERS[:escape_character],
+      segment_seperator = EdifactRails::DEFAULT_SPECIAL_CHARACTERS[:segment_seperator]
+    )
+      # Set the special characters
+      @component_data_element_seperator = component_data_element_seperator
+      @data_element_seperator = data_element_seperator
+      @decimal_notation = decimal_notation
+      @escape_character = escape_character
+      @segment_seperator = segment_seperator
+    end
+    def detect_special_characters(string)
+      # UNA tags must be at the start of the input otherwise they are ignored
+      return unless string[0..2] == "UNA"
+      # UNA segments look like this:
+      #
+      # UNA:+.? '
+      #
+      # UNA followed by 6 special characters which are, in order:
+      # 1. Component data element separator
+      # 2. Data element separator
+      # 3. Decimal notation (must be . or ,)
+      # 4. Release character (aka escape character)
+      # 5. Reserved for future use, so always a space for now
+      # 6. Segment terminator
+      set_special_characters(string[3], string[4], string[5], string[6], string[8])
+    end
+    def treat_input(string)
       # Prepare regex
       other_specials_rx = Regexp.quote(
         [
-          EdifactRails::Parser::SEGMENT_SEPARATOR,
-          EdifactRails::Parser::DATA_ELEMENT_SEPARATOR,
-          EdifactRails::Parser::COMPONENT_DATA_ELEMENT_SEPARATOR
+          @segment_seperator,
+          @data_element_seperator,
+          @component_data_element_seperator
         ].join
       )
@@ -64,7 +109,10 @@ module EdifactRails
       #
       # "LIN+even????+123" => '+' is not escaped, gsub'ed => "even???? +123" => parsed => ['LIN', ['even??'], [123]]
       # "LIN+odd???+123" => '+' is escaped, not gsub'ed => "odd???+123" => parsed => ['LIN', ['odd?+123']]
-      string.gsub(/(?<!#{@escape_char_rx})((#{@escape_char_rx}{2})+)([#{other_specials_rx}])/, '\1 \3')
+      string.gsub(
+        /(?<!#{Regexp.quote(@escape_character)})((#{Regexp.quote(@escape_character)}{2})+)([#{other_specials_rx}])/,
+        '\1 \3'
+      )
     end
     # Split the segment into data elements, take the first as the tag, then parse the rest
@@ -72,10 +120,10 @@ module EdifactRails
       # If the input is a tradacoms file, the segment tag will be proceeded by '=' instead of '+'
       # 'QTY=1+A:B' instead of 'QTY+1+A:B'
       # Fortunately, this is easily handled by simply changing these "="s into "+"s before the split
-      segment[3] = EdifactRails::Parser::DATA_ELEMENT_SEPARATOR if @is_tradacoms && segment.length >= 4
+      segment[3] = @data_element_seperator if @is_tradacoms && segment.length >= 4
       # Segments are made up of data elements
-      data_elements = segment.split(/(?<!#{@escape_char_rx})#{@data_element_separator_rx}/)
+      data_elements = segment.split(/(?<!#{Regexp.quote(@escape_character)})#{Regexp.quote(@data_element_seperator)}/)
       # The first element is the tag, pop it off
       parsed_segment = []
@@ -88,7 +136,8 @@ module EdifactRails
     # Split the data elements into component data elements, and treat them
     def parse_data_element(element)
       # Split data element into components
-      components = element.split(/(?<!#{@escape_char_rx})#{@component_data_element_separator_rx}/)
+      components =
+        element.split(/(?<!#{Regexp.quote(@escape_character)})#{Regexp.quote(@component_data_element_seperator)}/)
       components.map { |component| treat_component(component) }
     end
@@ -99,16 +148,16 @@ module EdifactRails
       component.strip!
       # Prepare regex
-      all_special_characters = [
-        EdifactRails::Parser::SEGMENT_SEPARATOR,
-        EdifactRails::Parser::DATA_ELEMENT_SEPARATOR,
-        EdifactRails::Parser::COMPONENT_DATA_ELEMENT_SEPARATOR,
-        EdifactRails::Parser::ESCAPE_CHARACTER
+      all_special_characters_string = [
+        @segment_seperator,
+        @data_element_seperator,
+        @component_data_element_seperator,
+        @escape_character
       ].join
       # If the component has escaped characters in it, remove the escape character and return the character as is
       # "?+" -> "+", "??" -> "?"
-      component.gsub!(/#{@escape_char_rx}([#{Regexp.quote(all_special_characters)}])/, '\1')
+      component.gsub!(/#{Regexp.quote(@escape_character)}([#{Regexp.quote(all_special_characters_string)}])/, '\1')
       # Convert empty strings to nils
       component = nil if component.empty?

data/lib/edifact_rails/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module EdifactRails
-  VERSION = "1.1.1"
+  VERSION = "1.2.1"
 end

data/lib/edifact_rails.rb CHANGED Viewed

@@ -3,12 +3,25 @@
 require "edifact_rails/parser"
 module EdifactRails
+  DEFAULT_SPECIAL_CHARACTERS = {
+    component_data_element_seperator: ":",
+    data_element_seperator: "+",
+    decimal_notation: ".",
+    escape_character: "?",
+    segment_seperator: "'"
+  }.freeze
   def self.parse(string)
     parser = EdifactRails::Parser.new
-    parser.parse string
+    parser.parse(string)
   end
   def self.parse_file(file_path)
     parse(File.read(file_path).split("\n").join)
   end
+  def self.una_special_characters(string = '')
+    parser = EdifactRails::Parser.new
+    parser.una_special_characters(string)
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: edifact_rails
 version: !ruby/object:Gem::Version
-  version: 1.1.1
+  version: 1.2.1
 platform: ruby
 authors:
 - David Blackwood
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-05-04 00:00:00.000000000 Z
+date: 2024-06-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: byebug