RubyGems - immosquare-yaml - Versions diffs - 0.1.28 → 1.0.0 - Mend

immosquare-yaml 0.1.28 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml +4 -4
data/lib/immosquare-yaml/version.rb +1 -1
data/lib/immosquare-yaml.rb +195 -637
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a3b992a868778a9c56accd519704c486fe36d61628d99f5494f92f0400f1cde5
-  data.tar.gz: ff1f1567cd7bd741a73f10ce5378375e0d464da5a33d7a4cf7e332d4c3c602a6
+  metadata.gz: 3a23201af8649660878ead589649938df652d4c3b4eccd8b5d2463643e7d0b79
+  data.tar.gz: 4ee0383267d4a9f5e718d0189395f043a67d663abb43409f5b1f35782e76768e
 SHA512:
-  metadata.gz: 8bcef442f5fe2c707f674807421071a3d8c948569f753bedcb2b77002fbe9c8c076ce000a1b461fae25f3228cfed282c25cdd2d461ae0f02cb9e2664fdb3633a
-  data.tar.gz: 98f6c75b116d1bb6d216fb9d5fab628b2c7a1b6894e0b1d7d8309c2b351ad10a3c06311c365b94b6389ed5659d83bf90d48941c1e774effc33940e67e43c70da
+  metadata.gz: c349896c32e18ce5fe66b8c00cc33f7e55828ddc48a4dd4e69977ea1cad46940c06186ac043574b514ed8d36357233f7442a4749c2215bca7e8c80878883a4c7
+  data.tar.gz: 2c0a872feb19356fca0895f00c1bc49eeba7bb6a40dab31e9710fd3a6b062d4e35b2cb0bee7b8953cc86bc0dcdab68bd77feb7eba9435b514d11a62ff030c5fe

data/lib/immosquare-yaml/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module ImmosquareYaml
-  VERSION = "0.1.28".freeze
+  VERSION = "1.0.0".freeze
 end

data/lib/immosquare-yaml.rb CHANGED Viewed

@@ -1,6 +1,4 @@
-require          "English"
 require          "psych"
-require          "date"
 require          "fileutils"
 require          "immosquare-extensions"
 require_relative "immosquare-yaml/configuration"
@@ -8,19 +6,27 @@ require_relative "immosquare-yaml/shared_methods"
 require_relative "immosquare-yaml/railtie" if defined?(Rails)
 ##============================================================##
-## Importing the 'English' library allows us to use more human-readable
-## global variables, such as $INPUT_RECORD_SEPARATOR instead of $/,
-## which enhances code clarity and makes it easier to understand
-## the purpose of these variables in our code.
+## ImmosquareYaml — post-processeur Psych dédié aux fichiers
+## de traduction (locales Rails).
+##
+## Trois responsabilités :
+##   - parse(file)  : YAML → Hash, en s'appuyant sur l'AST Psych
+##   - dump(hash)   : Hash → YAML formaté (quotes minimales,
+##                    blocs littéraux, emojis décodés)
+##   - clean(file)  : parse + tri par clé + dump → écrit
+##
+## La gem résout cinq problèmes que Psych seul ne traite pas :
+##   1. Norway problem (yes/no/on/off lus comme String)
+##   2. Tri déterministe par clé
+##   3. Préservation des blocs littéraux (|, |-)
+##   4. Quotes minimales pour la lisibilité
+##   5. Décodage des escapes \U0001F600 → emoji
 ##============================================================##
 module ImmosquareYaml
   extend SharedMethods
   class << self
-    ##============================================================##
-    ## Gem configuration
-    ##============================================================##
     attr_writer :configuration
     def configuration
@@ -32,67 +38,27 @@ module ImmosquareYaml
     end
     ##============================================================##
-    ## This method cleans a specified YAML file by processing it line by line.
-    ## It executes a comprehensive cleaning routine, which involves parsing the
-    ## YAML content to a hash, optionally sorting it, and then dumping it back
-    ## to a YAML format.
-    ##
-    ## Params:
-    ## +file_path+:: Path to the YAML file that needs to be cleaned.
-    ## +options+:: A hash of options where :sort controls whether the output should be sorted (default is true).
-    ##
-    ## Returns:
-    ## Boolean indicating the success (true) or failure (false) of the operation.
+    ## clean(file_path, sort: true, output: file_path)
+    ## Charge le fichier, le re-écrit propre et trié.
+    ## Retourne true / false selon le succès.
     ##============================================================##
     def clean(file_path, **options)
-      ##============================================================##
-      ## Default options
-      ##============================================================##
       options = {
         :sort   => true,
         :output => file_path
       }.merge(options)
       begin
-        output_file_path = nil
         raise("File not found") if !File.exist?(file_path)
-        ##============================================================##
-        ## Setup variables
-        ##============================================================##
-        output_file_path = options[:output]
-        ##============================================================##
-        ## Backup original content for restoration after parsing if necessary
-        ##============================================================##
-        original_content = File.read(file_path) if output_file_path != file_path
-        ##============================================================##
-        ## The cleaning procedure is initialized with a comprehensive clean, transforming
-        ## the YAML content to a hash to facilitate optional sorting, before
-        ## rewriting it to the YAML file in its cleaned and optionally sorted state.
-        ##============================================================##
-        clean_yml(file_path)
-        parsed_yml = parse(file_path)
-        parsed_yml = parsed_yml.sort_by_key
-        parsed_yml = dump(parsed_yml)
-        ##============================================================##
-        ## Restore original content if necessary
-        ##============================================================##
-        File.write(file_path, original_content) if output_file_path != file_path
+        parsed_yml = parse(file_path, :sort => options[:sort])
+        return false if parsed_yml == false
-        ##============================================================##
-        ## Write the cleaned YAML content to the specified output file
-        ##============================================================##
-        FileUtils.mkdir_p(File.dirname(output_file_path))
-        File.write(output_file_path, parsed_yml)
+        output = dump(parsed_yml)
+        FileUtils.mkdir_p(File.dirname(options[:output]))
+        File.write(options[:output], output)
         true
       rescue StandardError => e
-        ##============================================================##
-        ## Restore original content if necessary
-        ##============================================================##
-        File.write(file_path, original_content) if output_file_path != file_path && !original_content.nil?
         puts(e.message)
         puts(e.backtrace)
         false
@@ -100,55 +66,39 @@ module ImmosquareYaml
     end
     ##============================================================##
-    ## This method parses a specified YAML file, carrying out a preliminary
-    ## cleaning operation to ensure a smooth parsing process. Following this,
-    ## the cleaned file is transformed into a hash, which can optionally be sorted.
-    ## It operates under the assumption that the file is properly structured.
+    ## parse(file_path, sort: true)
+    ## Lit un fichier YAML et retourne un Hash Ruby.
+    ## Hash trié par clé par défaut.
     ##
-    ## Params:
-    ## +file_path+:: Path to the YAML file that needs to be parsed.
-    ## +options+:: A hash of options where :sort controls whether the output should be sorted (default is true).
-    ##
-    ## Returns:
-    ## A hash representation of the YAML file or false if an error occurs.
+    ## Implémentation : on parcourt l'AST Psych plutôt que d'appeler
+    ## Psych.load. Cela permet de :
+    ##   - distinguer un scalaire plain "yes" d'un bool true
+    ##   - garder les valeurs problématiques (Norway) en String
+    ##   - décoder nous-mêmes les escapes \U... pour les blocs
+    ##     littéraux qui ne sont pas désescapés par Psych
     ##============================================================##
     def parse(file_path, **options)
       options = {:sort => true}.merge(options)
       begin
-        original_content = nil
         raise("File not found") if !File.exist?(file_path)
         ##============================================================##
-        ## Backup original content for restoration after parsing
+        ## Psych.parse_file retourne un Document. Si le fichier est
+        ## vide ou ne contient que des commentaires, root est nil.
         ##============================================================##
-        original_content = File.read(file_path)
+        doc = Psych.parse_file(file_path)
+        return {} if !doc || doc.root.nil?
-        ##============================================================##
-        ## clean the file
-        ##============================================================##
-        clean_yml(file_path)
-        ##============================================================##
-        ## parse the file & sort if necessary
-        ##============================================================##
-        parsed_xml = parse_xml(file_path)
-        parsed_xml = parsed_xml.sort_by_key if options[:sort]
-        ##============================================================##
-        ## Restore original content
-        ##============================================================##
-        File.write(file_path, original_content) if !original_content.nil?
+        result = node_to_value(doc.root, {})
         ##============================================================##
-        ## Return the parsed YAML file
+        ## On accepte tous les types racine (Hash, Array, scalaire),
+        ## mais on ne trie que si la racine est un Hash.
         ##============================================================##
-        parsed_xml
+        result = result.sort_by_key if options[:sort] && result.is_a?(Hash)
+        result
       rescue StandardError => e
-        ##============================================================##
-        ## Restore original content
-        ##============================================================##
-        File.write(file_path, original_content) if !original_content.nil?
         puts(e.message)
         puts(e.backtrace)
         false
@@ -156,26 +106,28 @@ module ImmosquareYaml
     end
     ##============================================================##
-    ## This method performs a dump operation to obtain a well-structured
-    ## YAML file from a hash input. It iterates through each key-value pair in the
-    ## hash and constructs a series of lines representing the YAML file, with
-    ## appropriate indentations and handling of various value types including
-    ## strings with newline characters.
-    ##
-    ## Params:
-    ## +hash+:: The input hash to be converted into a YAML representation.
-    ## +lines+:: An array to hold the constructed lines (default is an empty array).
-    ## +indent+:: The current indentation level (default is 0).
-    ##
-    ## Returns:
-    ## A string representing the YAML representation of the input hash.
+    ## dump(hash) → String YAML
+    ## Sérialise un Hash en YAML avec nos règles de formatage :
+    ##   - clés "yes/no/on/..." re-quotées
+    ##   - valeurs plain quand c'est sûr, sinon doublequotées
+    ##   - chaînes multi-lignes en bloc littéral | ou |-
+    ##   - arrays imbriqués délégués à Psych.dump puis indentés
     ##============================================================##
-    def dump(hash, lines = [], indent = 0)
+    def dump(hash)
+      render_hash(hash, [], 0)
+    end
+    private
+    ##============================================================##
+    ## Rendu récursif d'un Hash. Les paramètres lines et indent
+    ## sont des accumulateurs internes — exposés dans la signature
+    ## privée uniquement.
+    ##============================================================##
+    def render_hash(hash, lines, indent)
       hash.each do |key, value|
-        ##============================================================##
-        ## Preparing the key with the proper indentation before identifying
-        ## the type of the value to handle it appropriately in the YAML representation.
-        ##============================================================##
         line = "#{SPACE * indent}#{clean_key(key)}:"
         case value
@@ -184,10 +136,9 @@ module ImmosquareYaml
         when String
           if value.include?(NEWLINE) || value.include?('\n')
             ##============================================================##
-            ## We display the line with the key
-            ## then the indentation if necessary
-            ## then - if necessary (the + is not displayed because it is
-            ## the default behavior)
+            ## Bloc littéral. On ajoute "-" si la valeur ne se termine
+            ## pas par un newline (chomp). Indent indicator si la valeur
+            ## a des leading spaces sur ses lignes.
             ##============================================================##
             line        += "#{SPACE}|"
             indent_level = value[/\A */].size
@@ -196,25 +147,21 @@ module ImmosquareYaml
             lines << line
             ##============================================================##
-            ## Remove quotes surrounding the value if they are present.
-            ## They are not necessary in this case after | or |-
+            ## Décode les escapes \U0001F600 dans les blocs littéraux
+            ## (Psych ne les désescape pas pour LITERAL/FOLDED).
             ##============================================================##
-            value = value[1..-2] while (value.start_with?(DOUBLE_QUOTE) && value.end_with?(DOUBLE_QUOTE)) || (value.start_with?(SIMPLE_QUOTE) && value.end_with?(SIMPLE_QUOTE))
+            value = decode_unicode_escapes(value)
-            ##============================================================##
-            ## We parse on the 2 types of line breaks
-            ##============================================================##
             value.split(/\\n|\n/).each do |subline|
               lines << "#{SPACE * (indent + INDENT_SIZE)}#{subline}"
             end
           else
-            line += "#{SPACE}#{value}"
+            line += "#{SPACE}#{format_scalar_value(value)}"
             lines << line
           end
         when Hash
           lines << line
-          dump(value, lines, indent + INDENT_SIZE)
+          render_hash(value, lines, indent + INDENT_SIZE)
         when Array
           formated_value = Psych.dump(value)
           if formated_value == "--- []\n"
@@ -226,568 +173,179 @@ module ImmosquareYaml
             lines << line
             lines << formated_value
           end
+        else
+          ##============================================================##
+          ## Numbers, booleans, dates : laissés tels quels.
+          ##============================================================##
+          line += "#{SPACE}#{value}"
+          lines << line
         end
       end
-      ##============================================================##
-      ## Finalizing the construction by adding a newline at the end and
-      ## removing whitespace from empty lines.
-      ##============================================================##
       lines += [NOTHING]
       lines = lines.map {|l| l.strip.empty? ? NOTHING : l }
       lines.join("\n")
     end
-    private
     ##============================================================##
-    ## Deeply cleans the specified YAML file
+    ## Walker AST : transforme un Psych::Nodes::* en valeur Ruby.
+    ## Le hash anchors mémorise les ancres rencontrées pour
+    ## résoudre les aliases.
     ##============================================================##
-    def clean_yml(file_path)
-      lines             = []
-      inblock_indent    = nil
-      weirdblock_indent = nil
-      inblock           = false
-      weirdblock        = false
-      line_index        = 1
-      ##============================================================##
-      ## First, we normalize the file by ensuring it always ends with an empty line
-      ## This also allows us to get the total number of lines in the file,
-      ## helping us to determine when we are processing the last line
-      ##============================================================##
-      line_count = File.normalize_last_line(file_path)
-      File.foreach(file_path) do |current_line|
-        last_line = line_index == line_count
-        ##============================================================##
-        ## Cleaning the current line by removing multiple spaces occurring after a non-space character
-        ##============================================================##
-        current_line = current_line.to_s.gsub(/(?<=\S)\s+/, SPACE)
-        ##============================================================##
-        ## Trimming potential whitespace characters from the end of the line
-        ##============================================================##
-        current_line = current_line.rstrip
-        ##============================================================##
-        ## Detecting blank lines to specially handle the last line within a block;
-        ## if we are inside a block or it's the last line, we avoid skipping
-        ##============================================================##
-        blank_line = current_line.gsub(NEWLINE, NOTHING).empty?
-        next if !(last_line || inblock || !blank_line)
-        ##============================================================##
-        ## Identifying the indentation level of the current line
-        ##============================================================##
-        last_inblock                 = inblock
-        indent_level                 = current_line[/\A */].size
-        need_to_clean_prev_inblock   = inblock    == true && ((!blank_line && indent_level <= inblock_indent) || last_line)
-        need_to_clen_prev_weirdblock = weirdblock == true && (indent_level <= weirdblock_indent || last_line)
-        ##============================================================##
-        ## Handling the exit from a block:
-        ## if we are exiting a block, we clean the entire block
-        ##============================================================##
-        if need_to_clean_prev_inblock
-          inblock = false
-          ##============================================================##
-          ## Extracting the entire block by tracing back lines until we find a lesser indentation
-          ## Subsequently determining the type of block we are in and clean accordingly
-          ##============================================================##
-          i            = -1
-          block_indent = lines[i][/\A */].size
-          block_lines  = [lines[i].lstrip]
-          while lines[i][/\A */].size == lines[i - 1][/\A */].size
-            block_lines << lines[i - 1].lstrip
-            i -= 1
-          end
-          ##============================================================##
-          ## Handling different types of blocks (literal blocks "|",
-          ## folded blocks ">", etc.)
-          ## and applying the respective formatting strategies based on
-          ## block type and additional indent specified
-          ##
-          ## | => Literal blocks: It keeps line breaks as
-          ## that they are given in the text block.
-          ## Final new line: A new line is added to the
-          ## end of text.
-          ## |- => Literal blocks: It keeps line breaks as
-          ## that they are given in the text block.
-          ## New final line: The final line break is deleted,
-          ## unlike the option |
-          ## > Folded blocks: It replaces each new line with a space,
-          ## transforming the block of text into a single line.
-          ## However, it preserves newlines that follow an empty line.
-          ## Final new line: A new line is added at the end of the text.
-          ## We can also have |4- or |4+ to say with indentation 4
-          ##============================================================##
-          block_lines  = block_lines.reverse
-          block_type   = lines[i - 1].split(": ").last
-          indent_suppl = block_type.scan(/\d+/).first.to_i
-          indent_suppl = indent_suppl > 0 ? indent_suppl - INDENT_SIZE : 0
-          case block_type[0]
-          when  ">"
-            lines[i - 1] = lines[i - 1].gsub(">", "|")
-            lines[i]     = "#{SPACE * (block_indent + indent_suppl)}#{clean_value(block_lines.join(SPACE))}"
-            ((i + 1)..-1).to_a.size.times { lines.pop }
-          else
-            split = clean_value(block_lines.join(NEWLINE), false).split(NEWLINE)
-            (i..-1).each do |ii|
-              lines[ii] = "#{SPACE * (block_indent + indent_suppl)}#{split.shift}"
-            end
-          end
-        end
-        ##============================================================##
-        ## Handling 'weirdblocks': cases where multi-line values are enclosed in quotes,
-        ## which should actually be single-line values
-        ## key: "
-        ## line1
-        ## line2
-        ## line3"
-        ## key: '
-        ## line1
-        ## line2
-        ## line3'
-        ##============================================================##
-        if need_to_clen_prev_weirdblock
-          weirdblock  = false
-          key, value  = lines[-1].split(":", 2)
-          lines[-1]   = "#{key}: #{clean_value(value)}"
-        end
-        ##============================================================##
-        ## Handling keys without values: if the previous line ends with a colon (:) and is not
-        ## followed by a value, we assign 'null' as the value
-        ##============================================================##
-        if inblock == false && weirdblock == false && lines[-1] && lines[-1].end_with?(":") && last_inblock == false
-          prev_indent = lines[-1][/\A */].size
-          lines[-1] += " null" if prev_indent >= indent_level
-        end
-        ##============================================================##
-        ## Splitting the current line into key and value parts for further processing
-        ## You have to split on ":" and not on ": " because we don't have a space when it's
-        ## just a key.. but we have a newline
-        ## fr: => ["fr", "\n"]
-        ##============================================================##
-        split = inblock || weirdblock ? [current_line] : current_line.strip.split(":", 2)
-        key   = inblock || weirdblock ? nil : split[0].to_s.strip
-        ##============================================================##
-        ## Line processing based on various conditions such as being inside a block,
-        ## starting with a comment symbol (#), or being a part of a 'weirdblock'
-        ## Each case has its specific line cleaning strategy
-        ## If the line is commented out, we keep and we remove newlines
-        ##============================================================##
-        if current_line.lstrip.start_with?("#")
-          lines << current_line.gsub(NEWLINE, NOTHING)
-        ##============================================================##
-        ## If is in a block (multiline > | or |-), we clean
-        ## the line because it can start with spaces tabs etc.
-        ## and put it with the block indenter
-        ##============================================================##
-        elsif inblock == true
-          current_line = current_line.gsub(NEWLINE, NOTHING).strip
-          lines << "#{SPACE * (inblock_indent + INDENT_SIZE)}#{current_line}"
-        ##============================================================##
-        ## if the line ends with a multi-line character and we have a key.
-        ## we start a block
-        ## The regex works as follows:
-        ## \S+    : All non-space characters at the start of the line.
-        ## :      : Matches the string ": " literally (space included).
-        ## [>|]   : Matches a single character that is either ">" or "|".
-        ## (\d*)  : Capture group that matches zero or more digits (0-9).
-        ## [-+]?  : Matches zero or a character that is either "-" or "+".
-        ## $      : Matches the end of the line/string.
-        ##============================================================##
-        elsif current_line.rstrip.match?(/\S+: [>|](\d*)[-+]?$/)
-          lines << current_line.gsub(NEWLINE, NOTHING)
-          inblock_indent = indent_level
-          inblock        = true
-        ##============================================================##
-        ## We are in the scenario of a multiline block
-        ## but without > | or |- at the end of the line
-        ## which should actually be inline.
-        ## mykey:
-        ## line1
-        ## line2
-        ## line3
-        ## my key: line1 line2 line3
-        ##============================================================##
-        elsif split.size < 2
-          if current_line.lstrip.start_with?("-")
-            lines << current_line
-          else
-            lines[-1] = (lines[-1] + " #{current_line.lstrip}").gsub(NEWLINE, NOTHING)
-          end
-        ##============================================================##
-        ## Otherwise we are in the case of a classic line
-        ## key: value
-        ## or
-        ## key: without value
-        ## - key: value (list)
-        ## - key: without value (list)
-        ##============================================================##
-        else
-          key           = clean_key(key)
-          spaces        = (SPACE * indent_level).to_s
-          current_line  = "#{spaces}#{key}:"
-          if !split[1].empty?
-            value = split[1].to_s.strip
-            ##============================================================##
-            ## We are in a multiline block which should be an inline
-            ## if the value starts with a " and the number of " is odd
-            ##============================================================##
-            if (value.start_with?(DOUBLE_QUOTE) && value.count(DOUBLE_QUOTE).odd?) || (value.start_with?(SIMPLE_QUOTE) && value.count(SIMPLE_QUOTE).odd?)
-              weirdblock        = true
-              weirdblock_indent = indent_level
-            else
-              value = clean_value(split[1])
-            end
-            current_line += " #{value}"
-          end
+    def node_to_value(node, anchors)
+      case node
+      when Psych::Nodes::Scalar
+        value = scalar_to_ruby(node)
+        anchors[node.anchor] = value if node.anchor
+        value
+      when Psych::Nodes::Mapping
+        h = {}
+        node.children.each_slice(2) do |key_node, val_node|
           ##============================================================##
-          ## Merging the cleaned key and value to form the cleaned row
+          ## Toujours convertir les clés en String. Cela évite les
+          ## hashs aux types mixtes (Integer/String) qui cassent le tri
+          ## et déstabilisent les fichiers de traduction.
           ##============================================================##
-          lines << current_line
+          key    = node_to_value(key_node, anchors).to_s
+          h[key] = node_to_value(val_node, anchors)
         end
-        ##============================================================##
-        ## We increment the line number
-        ##============================================================##
-        line_index += 1
+        anchors[node.anchor] = h if node.anchor
+        h
+      when Psych::Nodes::Sequence
+        arr = node.children.map {|c| node_to_value(c, anchors) }
+        anchors[node.anchor] = arr if node.anchor
+        arr
+      when Psych::Nodes::Alias
+        raise("Unknown YAML alias: *#{node.anchor}") if !anchors.key?(node.anchor)
+        anchors[node.anchor]
+      else
+        raise("Unsupported YAML node type: #{node.class}")
       end
-      ##============================================================##
-      ## We finish the file with a newline and we delete
-      ## spaces on "empty" lines + double spaces
-      ## with the same technique as above
-      ##============================================================##
-      lines += [NOTHING]
-      lines = lines.map {|l| (l.strip.empty? ? NOTHING : l).to_s.gsub(/(?<=\S)\s+/, SPACE) }
-      File.write(file_path, lines.join(NEWLINE))
     end
     ##============================================================##
-    ## clean_key Function
-    ## Purpose: Clean up and standardize YAML keys
-    ## Strategy:
-    ## 1. Forcefully convert the key to a string to handle gsub operations, especially if it's an integer.
-    ## 2. Remove quotes if they are present.
-    ## 3. Check if the key is an integer.
-    ## 4. Re-add quotes if the key is a reserved word or an integer.
-    ##
-    ## This allows us to fetch the string without the surrounding quotes.
+    ## Convertit un Psych::Nodes::Scalar en valeur Ruby.
+    ## Règles :
+    ##   - quoted (single/double) → toujours String
+    ##   - plain vide ou null/~ → nil
+    ##   - plain "yes/no/on/off/true/false" → String (Norway problem)
+    ##   - plain entier → Integer
+    ##   - plain flottant → Float
+    ##   - sinon → String
+    ##   - LITERAL/FOLDED : String, mais on décode \U... à l'usage
+    ##     dans le dump (pas ici, pour ne pas perdre l'info brute)
     ##============================================================##
-    def clean_key(key)
-      ##============================================================##
-      ## Convert key to string to avoid issues with gsub operations
-      ##============================================================##
-      key = key.to_s
+    def scalar_to_ruby(node)
+      raw   = node.value
+      style = node.style
-      ##============================================================##
-      ## Remove surrounding quotes from the key
-      ##============================================================##
-      key = key[1..-2] if (key.start_with?(DOUBLE_QUOTE) && key.end_with?(DOUBLE_QUOTE)) || (key.start_with?(SIMPLE_QUOTE) && key.end_with?(SIMPLE_QUOTE))
+      return raw if [Psych::Nodes::Scalar::SINGLE_QUOTED, Psych::Nodes::Scalar::DOUBLE_QUOTED].include?(style)
+      return raw if [Psych::Nodes::Scalar::LITERAL, Psych::Nodes::Scalar::FOLDED].include?(style)
       ##============================================================##
-      ## Check if the key is an integer
+      ## Style PLAIN : on type prudemment.
       ##============================================================##
-      is_int = key =~ /\A[-+]?\d+\z/
+      return nil if raw == NOTHING || ["~", "null", "Null", "NULL"].include?(raw)
+      return raw if RESERVED_KEYS.include?(raw)
+      return raw.to_i if raw.match?(/\A-?\d+\z/)
+      return raw.to_f if raw.match?(/\A-?\d+\.\d+\z/)
-      ##============================================================##
-      ## Re-add quotes if the key is in the list of reserved keys or is an integer
-      ##============================================================##
-      key = "\"#{key}\"" if RESERVED_KEYS.include?(key) || is_int
-      key
+      raw
     end
     ##============================================================##
-    ## " [apple, orange, 'banana']" => [apple, orange, 'banana']
+    ## Décode les séquences \U0001F600 en emoji UTF-8.
+    ## Appelé sur les valeurs string au moment du dump (pas au
+    ## parse, pour préserver l'idempotence si l'utilisateur a vraiment
+    ## la séquence littérale dans son YAML).
     ##============================================================##
-    def string_in_array(string)
-      begin
-        string_striped = string.strip
-        string_striped.match(/^\[.*\]$/) ? string_striped[1..-2].split(/,\s?/) : string
-      rescue StandardError
-        string
-      end
+    def decode_unicode_escapes(value)
+      value.gsub(/\\U([0-9A-Fa-f]{8})/) { [::Regexp.last_match(1).to_i(16)].pack("U*") }
     end
     ##============================================================##
-    ## clean_value Function
-    ## Purpose: Sanitize and standardize YAML values
-    ## In YAML "inblock" scenarios, there's no need to add quotes
-    ## around values as it's inherently handled.
+    ## clean_key : prépare une clé pour le dump.
+    ## - retire les quotes englobantes éventuelles
+    ## - re-quote si la clé est un mot réservé YAML 1.1 ou un entier
     ##============================================================##
-    def clean_value(values, with_quotes_verif = true)
-      ##============================================================##
-      ## Convert key to array if not
-      ## fruits: [apple, orange, 'banana']
-      ## demo: "demo"
-      ##============================================================##
-      is_array = string_in_array(values)
-      values   = is_array.instance_of?(String) ? [values] : is_array
-      values = values.map do |value|
-        ##============================================================##
-        ## Convert value to string to prevent issues in subsequent operations
-        ##============================================================##
-        value = value.to_s
-        ##============================================================##
-        ## Remove newline characters at the end of the value if present.
-        ## This should be done prior to strip operation to handle scenarios
-        ## where the value ends with a space followed by a newline.
-        ##============================================================##
-        value = value[0..-2] if value.end_with?(NEWLINE)
-        ##============================================================##
-        ## Clean up the value:
-        ## - Remove tabs, carriage returns, form feeds, and vertical tabs.
-        ## \t: corresponds to a tab
-        ## \r: corresponds to a carriage return
-        ## \f: corresponds to a form feed
-        ## \v: corresponds to a vertical tab
-        ## We keep the \n
-        ##============================================================##
-        value = value.gsub(/[\t\r\f\v]+/, NOTHING)
-        ##============================================================##
-        ## Replace multiple spaces with a single space.
-        ##============================================================##
-        value = value.gsub(/ {2,}/, SPACE)
-        ##============================================================##
-        ## Trim leading and trailing spaces.
-        ##============================================================##
-        value = value.strip
-        ##============================================================##
-        ## Replace special quotes with standard single quotes.
-        ##============================================================##
-        value = value.gsub(WEIRD_QUOTES_REGEX, SIMPLE_QUOTE)
-        ##============================================================##
-        ## Remove all quotes surrounding the value if they are present.
-        ## They will be re-added later if necessary.
-        ## """"value"""" => value
-        ##============================================================##
-        value = value[1..-2] while (value.start_with?(DOUBLE_QUOTE) && value.end_with?(DOUBLE_QUOTE)) || (value.start_with?(SIMPLE_QUOTE) && value.end_with?(SIMPLE_QUOTE))
-        ##============================================================##
-        ## Convert emoji representations such as \U0001F600 to their respective emojis.
-        ##============================================================##
-        value = value.gsub(/\\U([0-9A-Fa-f]{8})/) { [::Regexp.last_match(1).to_i(16)].pack("U*") }
-        ##============================================================##
-        ## Handling cases where the value must be surrounded by quotes
-        ## if:
-        ## management of "" and " ". Not possible to have more spaces
-        ## because we have already removed the double spaces
-        ## else
-        ## value.include?(": ")                   => key: text with: here
-        ## value.include?(" #")                   => key: text with # here
-        ## value.include?(NEWLINE)                => key: Line 1\nLine 2\nLine 3
-        ## value.include?('\n')                   => key: Line 1"\n"Line 2"\n"Line 3
-        ## value.start_with?(*YML_SPECIAL_CHARS)  => key: @text
-        ## value.end_with?(":")                   => key: text:
-        ## RESERVED_KEYS.include?(value)          => key: YES
-        ## value.start_with?(SPACE)               => key: 'text'
-        ## value.end_with?(SPACE)                 => key: text '
-        ##============================================================##
-        if value.empty?
-          value = "\"#{value}\""
-        elsif with_quotes_verif == true
-          value = "\"#{value}\"" if value.include?(": ") ||
-                                    value.include?(" #") ||
-                                    value.include?(NEWLINE) ||
-                                    value.include?('\n') ||
-                                    value.start_with?(*YML_SPECIAL_CHARS) ||
-                                    value.end_with?(":") ||
-                                    (is_array ? false : RESERVED_KEYS.include?(value)) ||
-                                    value.start_with?(SPACE) ||
-                                    value.end_with?(SPACE)
-        end
-        ##============================================================##
-        ## Final clean to prevent
-        ## "yes": YES
-        ## "no": NO
-        ##============================================================##
-        value = "\"#{value}\"" if RESERVED_KEYS.include?(value)
-        ##============================================================##
-        ## Return the cleaned value
-        ##============================================================##
-        value
-      end
-      is_array.instance_of?(String) ? values.first : "[#{values.join(", ")}]"
-    end
-    ##============================================================##
-    ## Normalize indentation for array values without intent
-    ## for the first level.
-    ##============================================================##
-    def normalize_indentation(lines)
-      initial_indentation = lines.first.match(/^(\s*)/)[1].length
-      lines.map do |line|
-        line[initial_indentation..(line.end_with?(NEWLINE) ? -2 : -1)]
-      end
+    def clean_key(key)
+      key    = strip_wrapping_quotes(key.to_s)
+      is_int = key.match?(/\A[-+]?\d+\z/)
+      key    = "\"#{key}\"" if RESERVED_KEYS.include?(key) || is_int
+      key
     end
     ##============================================================##
-    ## parse_xml Function
-    ## Purpose: Parse an XML file into a nested hash representation.
+    ## format_scalar_value : prépare une valeur String pour le dump.
+    ## Décode les escapes Unicode et décide si on doit quoter.
     ##
-    ## This method reads through the XML file line by line and creates a
-    ## nested hash representation based on the structure and content of the XML.
+    ## On quote si la valeur contient des caractères qui auraient
+    ## un sens YAML particulier en plain (": ", " #", début par un
+    ## caractère spécial, fin par ":", mot réservé, espace en bord).
     ##============================================================##
-    def parse_xml(file_path)
-      nested_hash = {}
-      inblock     = nil
-      inlist      = nil
-      inlist_data = nil
-      last_keys   = []
+    def format_scalar_value(value)
+      value = value.to_s
+      value = decode_unicode_escapes(value)
+      value = value.gsub(WEIRD_QUOTES_REGEX, SIMPLE_QUOTE)
       ##============================================================##
-      ## We go over each line of the file to create a hash.
-      ## We put the multiline blocks in an array to recover
-      ## all the values and the formatting type then we will pass
-      ## on each of these arrays subsequently to transform them
-      ## in the corresponding string
+      ## On enlève les guillemets parasites éventuels (cas de fichiers
+      ## historiques produits par l'ancienne version).
       ##============================================================##
-      File.foreach(file_path) do |line|
-        ##============================================================##
-        ## Determine the indentation level of the line.
-        ##============================================================##
-        indent_level = line[/\A */].size
-        ##============================================================##
-        ## Check for blank lines (which can be present within multi-line blocks)
-        ##============================================================##
-        blank_line = line.gsub(NEWLINE, NOTHING).empty?
-        ##============================================================##
-        ## Split the line into key and value.
-        ##============================================================##
-        split   = line.strip.split(":", 2)
-        key     = split[0].to_s.strip
-        inblock = nil if !inblock.nil? && !blank_line && indent_level <= inblock
-        ##============================================================##
-        ## inlist Enter
-        ##============================================================##
-        if inlist.nil? && !blank_line && line.strip.start_with?("-") && inblock.nil?
-          inlist      = indent_level
-          inlist_data = []
-        end
-        ##============================================================##
-        ## inlist Exit
-        ## We use Pscyh to parse the yaml of the list content
-        ##============================================================##
-        if !inlist.nil? && !blank_line && indent_level < inlist
-          yaml                = normalize_indentation(inlist_data).join(NEWLINE)
-          current_key         = last_keys.last
-          parent_keys         = last_keys[0..-2]
-          result              = parent_keys.reduce(nested_hash) {|hash, k| hash[k] }
-          result[current_key] = Psych.safe_load(yaml, :permitted_classes => [Date])
-          inlist              = nil
-          inlist_data         = []
-        end
-        ##============================================================##
-        ## Set the key level based on indentation
-        ##============================================================##
-        last_keys = last_keys[0, (blank_line ? inblock + INDENT_SIZE : indent_level) / INDENT_SIZE]
+      value = strip_wrapping_quotes(value)
+      ##============================================================##
+      ## Note : un " au milieu d'une string plain est légal en YAML.
+      ## On ne quote que si le " est en début (déjà couvert par
+      ## start_with?(*YML_SPECIAL_CHARS)). Quoter dès qu'un " apparaît
+      ## n'importe où dans la valeur produirait des diffs inutiles.
+      ##============================================================##
+      need_quotes = value.empty? ||
+                    value.include?(": ") ||
+                    value.include?(" #") ||
+                    value.start_with?(*YML_SPECIAL_CHARS) ||
+                    value.end_with?(":") ||
+                    RESERVED_KEYS.include?(value) ||
+                    value.start_with?(SPACE) ||
+                    value.end_with?(SPACE)
-        ##============================================================##
-        ## If inside a multi-line block, append the line to the current key's value
-        ##============================================================##
-        if !inblock.nil?
-          current_key           = last_keys.last
-          parent_keys           = last_keys[0..-2]
-          result                = parent_keys.reduce(nested_hash) {|hash, k| hash[k] }
-          result[current_key][1] << line.strip
-        ##============================================================##
-        ## Handle list declarations.
-        ##============================================================##
-        elsif !inlist.nil?
-          inlist_data << line
-        ##============================================================##
-        ## Handle multi-line key declarations.
-        ## We no longer have the >
-        ## because it is transformed in the clean_xml into |
-        ##============================================================##
-        elsif line.gsub("#{key}:", NOTHING).strip.start_with?("|")
-          inblock     = indent_level
-          block_type  = line.gsub("#{key}:", NOTHING).strip
-          result      = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
-          result[key] = ["#{CUSTOM_SEPARATOR}#{block_type}#{CUSTOM_SEPARATOR}", []]
-          last_keys << key
-        ##============================================================##
-        ## Handle regular key-value pair declarations
-        ##============================================================##
-        else
-          value  = split[1].to_s.strip
-          result = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
-          if value.empty?
-            result[key] = {}
-            last_keys << key
-          else
-            result[key] = value.strip == "null" ? nil : string_in_array(value)
-          end
-        end
-      end
+      return value if !need_quotes
       ##============================================================##
-      ## We go over each value then we process if it is a has
-      ## | with final newline
-      ## |4 with newline and indentation of 4
-      ## |- without newline
-      ## |4- without newline and indentation of 4
+      ## Choix du style de quoting :
+      ##   - single-quoted par défaut (plus léger, pas d'escapes)
+      ##   - double-quoted seulement si la valeur contient une
+      ##     apostrophe ou un caractère qui nécessite un escape
+      ##     (\, tab). Cela minimise les diffs git sur les fichiers
+      ##     existants et améliore la lisibilité (HTML notamment).
       ##============================================================##
-      deep_transform_values(nested_hash) do |value|
-        if value.is_a?(Array) && !value[0].nil? && value[0].instance_of?(String) && value[0].start_with?(CUSTOM_SEPARATOR) && value[0].end_with?(CUSTOM_SEPARATOR)
-          style_type   = value[0].gsub(CUSTOM_SEPARATOR, NOTHING)
-          indent_supp  = style_type.scan(/\d+/).first.to_i
-          indent_supp  = [indent_supp - INDENT_SIZE, 0].max
-          value[1]     = value[1].map {|l| "#{SPACE * indent_supp}#{l}" }
-          text         = value[1].join(NEWLINE)
-          modifier     = style_type[-1]
-          case modifier
-          when "+"
-            text << NEWLINE unless text.end_with?(NEWLINE)
-          when "-"
-            text.chomp!
-          else
-            text << NEWLINE unless text.end_with?(NEWLINE)
-          end
-          text
-        else
-          value
-        end
+      if value.include?(SIMPLE_QUOTE) || value.include?("\\") || value.include?("\t")
+        yaml_double_quote(value)
+      else
+        "#{SIMPLE_QUOTE}#{value}#{SIMPLE_QUOTE}"
       end
     end
+    ##============================================================##
+    ## Échappe une string pour la sérialiser en YAML double-quoted.
+    ## On gère \, ", \t et \n. Les newlines réels n'arrivent pas
+    ## ici car ils sont rendus en bloc littéral plus haut.
+    ##============================================================##
+    def yaml_double_quote(value)
+      escaped = value.gsub("\\", "\\\\\\\\").gsub("\"", '\\"').gsub("\t", '\\t')
+      "\"#{escaped}\""
+    end
+    ##============================================================##
+    ## Retire récursivement les paires de guillemets englobants.
+    ## Sert pour les fichiers historiques produits par v0.1.28
+    ## qui pouvaient contenir des valeurs avec quotes incluses.
+    ##============================================================##
+    def strip_wrapping_quotes(value)
+      value = value[1..-2] while (value.start_with?(DOUBLE_QUOTE) && value.end_with?(DOUBLE_QUOTE)) || (value.start_with?(SIMPLE_QUOTE) && value.end_with?(SIMPLE_QUOTE))
+      value
+    end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: immosquare-yaml
 version: !ruby/object:Gem::Version
-  version: 0.1.28
+  version: 1.0.0
 platform: ruby
 authors:
 - immosquare
@@ -62,7 +62,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.7.1
+rubygems_version: 4.0.11
 specification_version: 4
 summary: A YAML parser optimized for translation files.
 test_files: []