immosquare-yaml 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/immosquare-yaml.rb +698 -0
  3. data/lib/version.rb +5 -0
  4. metadata +61 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c14b630cc0f5e121db85ea66b43d18c0d59ae69e2f99db80bdeaf7077b35c5ca
4
+ data.tar.gz: 06073e555050eb9d7971135fcdab6b6ec1d00f29f283c5ab68153c5306a0c8d9
5
+ SHA512:
6
+ metadata.gz: da0e7f956a8cf9e1f5a2bac7d03114961d2b2d2b31d41c40b54fe17d853102bd97012cca89d7425c80fd1b04498a847c10641e65690223ea34aec9fa5c1b5798
7
+ data.tar.gz: 463d134187834be1649dddf650615f440712282d07448b9254de0d57b4c8d64419091a114bc2c4d23630a4cb3c28408202ebc34e7d1b91a30cf6a7ef78e2975e
@@ -0,0 +1,698 @@
1
+ module ImmosquareYaml
2
+ class << self
3
+
4
+ INDENT_SIZE = 2
5
+ SPACE = " ".freeze
6
+ NEWLINE = "\n".freeze
7
+ SIMPLE_QUOTE = "'".freeze
8
+ DOUBLE_QUOTE = '"'.freeze
9
+ DOUBLE_SIMPLE_QUOTE = "''".freeze
10
+ WEIRD_QUOTES_REGEX = /‘|’|“|”|‛|‚|„|‟|#{Regexp.quote(DOUBLE_SIMPLE_QUOTE)}/.freeze
11
+ YML_SPECIAL_CHARS = ["-", "`", "{", "}", "|", "[", "]", ">", ":", "\"", "'", "*", "=", "%", ",", "!", "?", "&", "#", "@"].freeze
12
+ RESERVED_KEYS = [
13
+ "yes", "no", "on", "off", "true", "false",
14
+ "Yes", "No", "On", "Off", "True", "False",
15
+ "YES", "NO", "ON", "OFF", "TRUE", "FALSE"
16
+ ].freeze
17
+
18
+ ##===========================================================================##
19
+ ## This method cleans a specified YAML file by processing it line by line.
20
+ ## It executes a comprehensive cleaning routine, which involves parsing the
21
+ ## YAML content to a hash, optionally sorting it, and then dumping it back
22
+ ## to a YAML format.
23
+ ##
24
+ ## Params:
25
+ ## +file_path+:: Path to the YAML file that needs to be cleaned.
26
+ ## +options+:: A hash of options where :sort controls whether the output should be sorted (default is true).
27
+ ##
28
+ ## Returns:
29
+ ## Boolean indicating the success (true) or failure (false) of the operation.
30
+ ##===========================================================================##
31
+ def clean(file_path, **options)
32
+ ##============================================================##
33
+ ## Default options
34
+ ##============================================================##
35
+ options = {:sort => true}.merge(options)
36
+
37
+ begin
38
+ raise("File not found") if !File.exist?(file_path)
39
+
40
+ ##===========================================================================##
41
+ ## The cleaning procedure is initialized with a comprehensive clean, transforming
42
+ ## the YAML content to a hash to facilitate optional sorting, before
43
+ ## rewriting it to the YAML file in its cleaned and optionally sorted state.
44
+ ##===========================================================================##
45
+ clean_yml(file_path)
46
+ yaml_final = parse(file_path)
47
+ yaml_final = sort_by_key(yaml_final, options[:sort]) if options[:sort]
48
+ yaml_final = dump(yaml_final)
49
+ File.write(file_path, yaml_final)
50
+ true
51
+ rescue StandardError => e
52
+ puts(e.message)
53
+ false
54
+ end
55
+ end
56
+
57
+ ##==========================================================================##
58
+ ## This method parses a specified YAML file, carrying out a preliminary
59
+ ## cleaning operation to ensure a smooth parsing process. Following this,
60
+ ## the cleaned file is transformed into a hash, which can optionally be sorted.
61
+ ## It operates under the assumption that the file is properly structured.
62
+ ##
63
+ ## Params:
64
+ ## +file_path+:: Path to the YAML file that needs to be parsed.
65
+ ## +options+:: A hash of options where :sort controls whether the output should be sorted (default is true).
66
+ ##
67
+ ## Returns:
68
+ ## A hash representation of the YAML file or false if an error occurs.
69
+ ##==========================================================================##
70
+ def parse(file_path, **options)
71
+ options = {:sort => true}.merge(options)
72
+
73
+ begin
74
+ raise("File not found") if !File.exist?(file_path)
75
+
76
+ clean_yml(file_path)
77
+ yaml_final = parse_xml(file_path)
78
+ yaml_final = sort_by_key(yaml_final, options[:sort]) if options[:sort]
79
+ yaml_final
80
+ rescue StandardError => e
81
+ puts(e.message)
82
+ false
83
+ end
84
+ end
85
+
86
+ ##===========================================================================##
87
+ ## This method performs a dump operation to obtain a well-structured
88
+ ## YAML file from a hash input. It iterates through each key-value pair in the
89
+ ## hash and constructs a series of lines representing the YAML file, with
90
+ ## appropriate indentations and handling of various value types including
91
+ ## strings with newline characters.
92
+ ##
93
+ ## Params:
94
+ ## +hash+:: The input hash to be converted into a YAML representation.
95
+ ## +lines+:: An array to hold the constructed lines (default is an empty array).
96
+ ## +indent+:: The current indentation level (default is 0).
97
+ ##
98
+ ## Returns:
99
+ ## A string representing the YAML representation of the input hash.
100
+ ##===========================================================================##
101
+ def dump(hash, lines = [], indent = 0)
102
+ hash.each do |key, value|
103
+ ##===========================================================================##
104
+ ## Preparing the key with the proper indentation before identifying
105
+ ## the type of the value to handle it appropriately in the YAML representation.
106
+ ##===========================================================================##
107
+ line = "#{SPACE * indent}#{clean_key(key)}:"
108
+
109
+ case value
110
+ when nil
111
+ lines << "#{line} null"
112
+ when String
113
+ if value.include?(NEWLINE) || value.include?('\n')
114
+ ##=============================================================##
115
+ ## We display the line with the key
116
+ ## then the indentation if necessary
117
+ ## then - if necessary (the + is not displayed because it is
118
+ ## the default behavior)
119
+ ##=============================================================##
120
+ line += "#{SPACE}|"
121
+ indent_level = value[/\A */].size
122
+ line += (indent_level + INDENT_SIZE).to_s if indent_level > 0
123
+ line += "-" if !value.end_with?(NEWLINE)
124
+ lines << line
125
+
126
+ ##=============================================================##
127
+ ## We parse on the 2 types of line breaks
128
+ ##=============================================================##
129
+ value.split(/\\n|\n/).each do |subline|
130
+ lines << "#{SPACE * (indent + INDENT_SIZE)}#{subline}"
131
+ end
132
+ else
133
+ line += "#{SPACE}#{value}"
134
+ lines << line
135
+ end
136
+ when Hash
137
+ lines << line
138
+ dump(value, lines, indent + INDENT_SIZE)
139
+ end
140
+ end
141
+
142
+ ##===========================================================================##
143
+ ## Finalizing the construction by adding a newline at the end and
144
+ ## removing whitespace from empty lines.
145
+ ##===========================================================================##
146
+ lines += [""]
147
+ lines = lines.map {|l| l.strip.empty? ? "" : l }
148
+ lines.join("\n")
149
+ end
150
+
151
+
152
+ private
153
+
154
+ ##===========================================================================##
155
+ ## This method ensures the file ends with a single newline, facilitating
156
+ ## cleaner multi-line blocks. It operates by reading all lines of the file,
157
+ ## removing any empty lines at the end, and then appending a newline.
158
+ ## This guarantees the presence of a newline at the end, and also prevents
159
+ ## multiple newlines from being present at the end.
160
+ ##
161
+ ## Params:
162
+ ## +file_path+:: The path to the file to be normalized.
163
+ ##
164
+ ## Returns:
165
+ ## The total number of lines in the normalized file.
166
+ ##===========================================================================##
167
+ def normalize_last_line(file_path)
168
+ ##============================================================##
169
+ ## Read all lines from the file
170
+ ## https://gist.github.com/guilhermesimoes/d69e547884e556c3dc95
171
+ ##============================================================##
172
+ lines = File.read(file_path).lines
173
+
174
+ ##============================================================##
175
+ ## Ensure the last line ends with a newline character
176
+ ##============================================================##
177
+ lines[-1] = "#{lines[-1]}#{NEWLINE}" if !lines[-1].end_with?(NEWLINE)
178
+
179
+ ##===========================================================================##
180
+ ## Remove all trailing empty lines at the end of the file
181
+ ##===========================================================================##
182
+ lines.pop while lines.last && lines.last.strip.empty?
183
+
184
+ ##===========================================================================##
185
+ ## Append a newline at the end to maintain the file structure
186
+ ###===========================================================================##
187
+ lines += [NEWLINE]
188
+
189
+ ##===========================================================================##
190
+ ## Write the modified lines back to the file
191
+ ##===========================================================================##
192
+ File.write(file_path, lines.join)
193
+
194
+ ##===========================================================================##
195
+ ## Return the total number of lines in the modified file
196
+ ##===========================================================================##
197
+ lines.size
198
+ end
199
+
200
+ ##============================================================##
201
+ ## Deeply cleans the specified YAML file
202
+ ##============================================================##
203
+ def clean_yml(file_path)
204
+ lines = []
205
+ inblock_indent = nil
206
+ weirdblock_indent = nil
207
+ inblock = false
208
+ weirdblock = false
209
+ line_index = 1
210
+
211
+ ##===================================================================================#
212
+ ## First, we normalize the file by ensuring it always ends with an empty line
213
+ ## This also allows us to get the total number of lines in the file,
214
+ ## helping us to determine when we are processing the last line
215
+ ###===================================================================================#
216
+ line_count = normalize_last_line(file_path)
217
+
218
+
219
+ File.foreach(file_path) do |current_line|
220
+ last_line = line_index == line_count
221
+
222
+ ##===================================================================================#
223
+ ## Cleaning the current line by removing multiple spaces occurring after a non-space character
224
+ ##===================================================================================#
225
+ current_line = current_line.to_s.gsub(/(?<=\S)\s+/, SPACE)
226
+
227
+ ##============================================================##
228
+ ## Trimming potential whitespace characters from the end of the line
229
+ ##============================================================##
230
+ current_line = current_line.rstrip
231
+
232
+
233
+ ##===================================================================================#
234
+ ## Detecting blank lines to specially handle the last line within a block;
235
+ ## if we are inside a block or it's the last line, we avoid skipping
236
+ ##===================================================================================#
237
+ blank_line = current_line.gsub(NEWLINE, "").empty?
238
+ next if !(last_line || inblock || !blank_line)
239
+
240
+ ##============================================================##
241
+ ## Identifying the indentation level of the current line
242
+ ##============================================================##
243
+ last_inblock = inblock
244
+ indent_level = current_line[/\A */].size
245
+ need_to_clean_prev_inblock = inblock == true && ((!blank_line && indent_level <= inblock_indent) || last_line)
246
+ need_to_clen_prev_weirdblock = weirdblock == true && (indent_level <= weirdblock_indent || last_line)
247
+
248
+ ##===================================================================================#
249
+ ## Handling the exit from a block:
250
+ ## if we are exiting a block, we clean the entire block
251
+ ##===================================================================================#
252
+ if need_to_clean_prev_inblock
253
+ inblock = false
254
+ ##============================================================##
255
+ ## Extracting the entire block by tracing back lines until we find a lesser indentation
256
+ ## Subsequently determining the type of block we are in and clean accordingly
257
+ ##============================================================##
258
+ i = -1
259
+ block_indent = lines[i][/\A */].size
260
+ block_lines = [lines[i].lstrip]
261
+ while lines[i][/\A */].size == lines[i - 1][/\A */].size
262
+ block_lines << lines[i - 1].lstrip
263
+ i -= 1
264
+ end
265
+
266
+ ##============================================================##
267
+ ## Handling different types of blocks (literal blocks "|",
268
+ ## folded blocks ">", etc.)
269
+ ## and applying the respective formatting strategies based on
270
+ ## block type and additional indent specified
271
+ ##
272
+ ## | => Literal blocks: It keeps line breaks as
273
+ ## that they are given in the text block.
274
+ ## Final new line: A new line is added to the
275
+ ## end of text.
276
+ ## |- => Literal blocks: It keeps line breaks as
277
+ ## that they are given in the text block.
278
+ ## New final line: The final line break is deleted,
279
+ ## unlike the option |
280
+ ## > Folded blocks: It replaces each new line with a space,
281
+ ## transforming the block of text into a single line.
282
+ ## However, it preserves newlines that follow an empty line.
283
+ ## Final new line: A new line is added at the end of the text.
284
+ ## ===
285
+ ## We can also have |4- or |4+ to say with indentation 4
286
+ ##============================================================##
287
+ block_lines = block_lines.reverse
288
+ block_type = lines[i - 1].split(": ").last
289
+ indent_suppl = block_type.scan(/\d+/).first.to_i
290
+ indent_suppl = indent_suppl > 0 ? indent_suppl - INDENT_SIZE : 0
291
+ case block_type[0]
292
+ when ">"
293
+ lines[i - 1] = lines[i - 1].gsub(">", "|")
294
+ lines[i] = "#{SPACE * (block_indent + indent_suppl)}#{clean_value(block_lines.join(SPACE))}"
295
+ ((i + 1)..-1).to_a.size.times { lines.pop }
296
+ else
297
+ split = clean_value(block_lines.join(NEWLINE), false).split(NEWLINE)
298
+ (i..-1).each do |ii|
299
+ lines[ii] = "#{SPACE * (block_indent + indent_suppl)}#{split.shift}"
300
+ end
301
+ end
302
+ end
303
+
304
+ ##===================================================================================#
305
+ ## Handling 'weirdblocks': cases where multi-line values are enclosed in quotes,
306
+ ## which should actually be single-line values
307
+ ## key: "
308
+ ## line1
309
+ ## line2
310
+ ## line3"
311
+ ## key: '
312
+ ## line1
313
+ ## line2
314
+ ## line3'
315
+ ##============================================================##
316
+ if need_to_clen_prev_weirdblock
317
+ weirdblock = false
318
+ key, value = lines[-1].split(":", 2)
319
+ lines[-1] = "#{key}: #{clean_value(value)}"
320
+ end
321
+
322
+ ##===================================================================================#
323
+ ## Handling keys without values: if the previous line ends with a colon (:) and is not
324
+ ## followed by a value, we assign 'null' as the value
325
+ ##===================================================================================#
326
+ if inblock == false && weirdblock == false && lines[-1] && lines[-1].end_with?(":") && last_inblock == false
327
+ prev_indent = lines[-1][/\A */].size
328
+ lines[-1] += " null" if prev_indent >= indent_level
329
+ end
330
+
331
+ ##============================================================##
332
+ ## Splitting the current line into key and value parts for further processing
333
+ ## You have to split on ":" and not on ": " because we don't have a space when it's
334
+ ## just a key.. but we have a newline
335
+ ## fr: => ["fr", "\n"]
336
+ ##============================================================##
337
+ split = inblock || weirdblock ? [current_line] : current_line.strip.split(":", 2)
338
+ key = inblock || weirdblock ? nil : split[0].to_s.strip
339
+
340
+ ##===================================================================================#
341
+ ## Line processing based on various conditions such as being inside a block,
342
+ ## starting with a comment symbol (#), or being a part of a 'weirdblock'
343
+ ## Each case has its specific line cleaning strategy
344
+ ## ----
345
+ ## If the line is commented out, we keep and we remove newlines
346
+ ##============================================================##
347
+ if current_line.lstrip.start_with?("#")
348
+ lines << current_line.gsub(NEWLINE, "")
349
+ ##================================================= ============##
350
+ ## If is in a block (multiline > | or |-), we clean
351
+ ## the line because it can start with spaces tabs etc.
352
+ ## and put it with the block indenter
353
+ ##================================================= ============##
354
+ elsif inblock == true
355
+ current_line = current_line.gsub(NEWLINE, "").strip
356
+ lines << "#{SPACE * (inblock_indent + INDENT_SIZE)}#{current_line}"
357
+ ##================================================= ============##
358
+ ## if the line ends with a multi-line character and we have a key.
359
+ ## we start a block
360
+ ## The regex works as follows:
361
+ ##=========================================================
362
+ ## \S+ : All non-space characters at the start of the line.
363
+ ## : : Matches the string ": " literally (space included).
364
+ ## [>|] : Matches a single character that is either ">" or "|".
365
+ ## (\d*) : Capture group that matches zero or more digits (0-9).
366
+ ## [-+]? : Matches zero or a character that is either "-" or "+".
367
+ ## $ : Matches the end of the line/string.
368
+ ##================================================= ============##
369
+ elsif current_line.rstrip.match?(/\S+: [>|](\d*)[-+]?$/)
370
+ lines << current_line.gsub(NEWLINE, "")
371
+ inblock_indent = indent_level
372
+ inblock = true
373
+ ##============================================================##
374
+ ## We are in the scenario of a multiline block
375
+ ## but without > | or |- at the end of the line
376
+ ## which should actually be inline.
377
+ ## mykey:
378
+ ## line1
379
+ ## line2
380
+ ## line3
381
+ ## my key: line1 line2 line3
382
+ ##============================================================##
383
+ elsif split.size < 2
384
+ lines[-1] = (lines[-1] + " #{current_line.lstrip}").gsub(NEWLINE, "")
385
+ ##============================================================##
386
+ ## Otherwise we are in the case of a classic line
387
+ ## key: value or key: without value
388
+ ##============================================================##
389
+ else
390
+ key = clean_key(key)
391
+ spaces = (SPACE * indent_level).to_s
392
+ current_line = "#{spaces}#{key}:"
393
+
394
+ if !split[1].empty?
395
+ value = split[1].to_s.strip
396
+
397
+ ##============================================================##
398
+ ## We are in a multiline block which should be an inline
399
+ ## if the value starts with a " and the number of " is odd
400
+ ##============================================================##
401
+ if (value.start_with?(DOUBLE_QUOTE) && value.count(DOUBLE_QUOTE).odd?) || (value.start_with?(SIMPLE_QUOTE) && value.count(SIMPLE_QUOTE).odd?)
402
+ weirdblock = true
403
+ weirdblock_indent = indent_level
404
+ else
405
+ value = clean_value(split[1])
406
+ end
407
+ current_line += " #{value}"
408
+ end
409
+
410
+ ##============================================================##
411
+ ## Merging the cleaned key and value to form the cleaned row
412
+ ##============================================================##
413
+ lines << current_line
414
+ end
415
+
416
+ ##============================================================##
417
+ ## We increment the line number
418
+ ##============================================================##
419
+ line_index += 1
420
+ end
421
+
422
+ ##============================================================##
423
+ ## We finish the file with a newline and we delete
424
+ ## spaces on "empty" lines + double spaces
425
+ ## with the same technique as above
426
+ ##============================================================##
427
+ lines += [""]
428
+ lines = lines.map {|l| (l.strip.empty? ? "" : l).to_s.gsub(/(?<=\S)\s+/, SPACE) }
429
+ File.write(file_path, lines.join(NEWLINE))
430
+ end
431
+
432
+ ##============================================================##
433
+ ## clean_key Function
434
+ ## Purpose: Clean up and standardize YAML keys
435
+ ##============================================================##
436
+ ## Strategy:
437
+ ## 1. Forcefully convert the key to a string to handle gsub operations, especially if it's an integer.
438
+ ## 2. Check if the key is an integer.
439
+ ## 3. Remove quotes if they are present.
440
+ ## 4. Re-add quotes if the key is a reserved word or an integer.
441
+ #
442
+ ## Regular Expression Explanation:
443
+ ## /\A(['“‘”’"])(.*)\1\z/
444
+ ## \A: Matches the start of the string, ensuring our pattern begins at the very start of the string.
445
+ ## (['“‘”’"]): Captures a single quote character. It matches any of the characters specified within the brackets.
446
+ ## This includes various types of single and double quotes.
447
+ ## (.*) : Captures zero or more of any character. It "captures" the entirety of the string between the quotes.
448
+ ## \1: Refers back to the first captured group, ensuring the same type of quote character is found at the end.
449
+ ## \z: Matches the end of the string, ensuring our pattern matches up to the very end.
450
+ #
451
+ ## In the second argument of gsub, we use '\2' to refer back to the content captured by the second capture group.
452
+ ## This allows us to fetch the string without the surrounding quotes.
453
+ ##============================================================##
454
+ def clean_key(key)
455
+ ##============================================================##
456
+ ## Convert key to string to avoid issues with gsub operations
457
+ ## + Check if the key is an integer
458
+ ##============================================================##
459
+ key = key.to_s
460
+ is_int = key =~ /\A[-+]?\d+\z/
461
+
462
+ ##============================================================##
463
+ ## Remove surrounding quotes from the key
464
+ ## Re-add quotes if the key is in the list of reserved keys or is an integer
465
+ ##============================================================##
466
+ key = key.gsub(/\A(['“”‘’"]?)(.*)\1\z/, '\2')
467
+ key = "\"#{key}\"" if key.in?(RESERVED_KEYS) || is_int
468
+ key
469
+ end
470
+
471
+ ##============================================================##
472
+ ## clean_value Function
473
+ ## Purpose: Sanitize and standardize YAML values
474
+ ## In YAML "inblock" scenarios, there's no need to add quotes
475
+ ## around values as it's inherently handled.
476
+ ## ============================================================ ##
477
+ def clean_value(value, with_quotes_verif = true)
478
+ ##============================================================##
479
+ ## Convert value to string to prevent issues in subsequent operations
480
+ ##============================================================##
481
+ value = value.to_s
482
+
483
+ ##============================================================##
484
+ ## Remove newline characters at the end of the value if present.
485
+ ## This should be done prior to strip operation to handle scenarios
486
+ ## where the value ends with a space followed by a newline.
487
+ ###============================================================##
488
+ value = value[0..-2] if value.end_with?(NEWLINE)
489
+
490
+
491
+ ##============================================================##
492
+ ## Clean up the value:
493
+ ## - Remove tabs, carriage returns, form feeds, and vertical tabs.
494
+ ## \t: corresponds to a tab
495
+ ## \r: corresponds to a carriage return
496
+ ## \f: corresponds to a form feed
497
+ ## \v: corresponds to a vertical tab
498
+ ## We keep the \n
499
+ ##============================================================##
500
+ value = value.gsub(/[\t\r\f\v]+/, "")
501
+
502
+ ##============================================================##
503
+ ## Replace multiple spaces with a single space.
504
+ ##============================================================##
505
+ value = value.gsub(/ {2,}/, SPACE)
506
+
507
+ ##============================================================##
508
+ ## Trim leading and trailing spaces.
509
+ ##============================================================##
510
+ value = value.strip
511
+
512
+ ##============================================================##
513
+ ## Replace special quotes with standard single quotes.
514
+ ##============================================================##
515
+ value = value.gsub(WEIRD_QUOTES_REGEX, SIMPLE_QUOTE)
516
+
517
+ ##============================================================##
518
+ ## Remove quotes surrounding the value if they are present.
519
+ ## They will be re-added later if necessary.
520
+ ##============================================================##
521
+ value = value[1..-2] if (value.start_with?(DOUBLE_QUOTE) && value.end_with?(DOUBLE_QUOTE)) || (value.start_with?(SIMPLE_QUOTE) && value.end_with?(SIMPLE_QUOTE))
522
+
523
+ ##============================================================##
524
+ ## Convert emoji representations such as \U0001F600 to their respective emojis.
525
+ ##============================================================##
526
+ value = value.gsub(/\\U([0-9A-Fa-f]{8})/) { [::Regexp.last_match(1).to_i(16)].pack("U*") }
527
+
528
+ ##=============================================================##
529
+ ## Handling cases where the value must be surrounded by quotes
530
+ ## if:
531
+ ## value.include?(": ") => key: text with: here
532
+ ## value.include?(" #") => key: text with # here
533
+ ## value.include?(NEWLINE) => key: Line 1\nLine 2\nLine 3
534
+ ## value.include?('\n') => key: Line 1"\n"Line 2"\n"Line 3
535
+ ## value.start_with?(*YML_SPECIAL_CHARS) => key: @text
536
+ ## value.end_with?(":") => key: text:
537
+ ## value.in?(RESERVED_KEYS) => key: YES
538
+ ## value.start_with?(SPACE) => key: 'text'
539
+ ## value.end_with?(SPACE) => key: text '
540
+ ## else:
541
+ ## management of "" and " ". Not possible to have more spaces
542
+ ## because we have already removed the double spaces
543
+ ##=============================================================##
544
+ if value.present?
545
+ value = "\"#{value}\"" if (value.include?(": ") ||
546
+ value.include?(" #") ||
547
+ value.include?(NEWLINE) ||
548
+ value.include?('\n') ||
549
+ value.start_with?(*YML_SPECIAL_CHARS) ||
550
+ value.end_with?(":") ||
551
+ value.in?(RESERVED_KEYS) ||
552
+ value.start_with?(SPACE) ||
553
+ value.end_with?(SPACE)) &&
554
+ with_quotes_verif == true
555
+
556
+ else
557
+ value = "\"#{value}\""
558
+ end
559
+ value
560
+ end
561
+
562
+ ##============================================================##
563
+ ## Deep transform values resursively
564
+ ##============================================================##
565
+ def deep_transform_values(hash, &block)
566
+ hash.transform_values do |value|
567
+ if value.is_a?(Hash)
568
+ deep_transform_values(value, &block)
569
+ else
570
+ block.call(value)
571
+ end
572
+ end
573
+ end
574
+
575
+ ##============================================================##
576
+ ## sort_by_key Function
577
+ ## Purpose: Sort a hash by its keys, optionally recursively, with
578
+ ## case-insensitive comparison and stripping of double quotes.
579
+ ## ============================================================ #
580
+ def sort_by_key(hash, recursive = false, &block)
581
+ block ||= proc {|a, b| a.to_s.downcase.gsub(DOUBLE_QUOTE, "") <=> b.to_s.downcase.gsub(DOUBLE_QUOTE, "") }
582
+ hash.keys.sort(&block).each_with_object({}) do |key, seed|
583
+ seed[key] = hash[key]
584
+ seed[key] = sort_by_key(seed[key], true, &block) if recursive && seed[key].is_a?(Hash)
585
+ end
586
+ end
587
+
588
+ ##============================================================##
589
+ ## parse_xml Function
590
+ ## Purpose: Parse an XML file into a nested hash representation.
591
+ ##
592
+ ## This method reads through the XML file line by line and creates a
593
+ ## nested hash representation based on the structure and content of the XML.
594
+ ##============================================================##
595
+ def parse_xml(file_path)
596
+ nested_hash = {}
597
+ inblock = nil
598
+ last_keys = []
599
+
600
+ ##============================================================##
601
+ ## We go over each line of the file to create a hash.
602
+ ## We put the multiline blocks in an array to recover
603
+ ## all the values and the formatting type then we will pass
604
+ ## on each of these arrays subsequently to transform them
605
+ ## in the corresponding string
606
+ ##============================================================##
607
+ File.foreach(file_path) do |line|
608
+ ##============================================================##
609
+ ## Determine the indentation level of the line.
610
+ ##============================================================##
611
+ indent_level = line[/\A */].size
612
+
613
+ ##============================================================##
614
+ ## Check for blank lines (which can be present within multi-line blocks)
615
+ ##============================================================##
616
+ blank_line = line.gsub(NEWLINE, "").empty?
617
+
618
+ ##============================================================##
619
+ ## Split the line into key and value.
620
+ ##============================================================##
621
+ split = line.strip.split(":", 2)
622
+ key = split[0].to_s.strip
623
+ inblock = nil if !inblock.nil? && !blank_line && indent_level <= inblock
624
+
625
+
626
+ ##============================================================##
627
+ ## Set the key level based on indentation
628
+ ##============================================================##
629
+ last_keys = last_keys[0, (blank_line ? inblock + INDENT_SIZE : indent_level) / INDENT_SIZE]
630
+
631
+ ##============================================================##
632
+ ## If inside a multi-line block, append the line to the current key's value
633
+ ##============================================================##
634
+ if !inblock.nil?
635
+ current_key = last_keys.last
636
+ parent_keys = last_keys[0..-2]
637
+ result = parent_keys.reduce(nested_hash) {|hash, k| hash[k] }
638
+ result[current_key][1] << line.strip
639
+ ##============================================================##
640
+ ## Handle multi-line key declarations.
641
+ ## We no longer have the >
642
+ ## because it is transformed in the clean_xml into |
643
+ ##============================================================##
644
+ elsif line.gsub("#{key}:", "").strip.start_with?("|")
645
+ inblock = indent_level
646
+ block_type = line.gsub("#{key}:", "").strip
647
+ result = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
648
+ result[key] = [block_type, []]
649
+ last_keys << key
650
+ ##============================================================##
651
+ ## Handle regular key-value pair declarations
652
+ ##============================================================##
653
+ else
654
+ value = split[1].to_s.strip
655
+ result = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
656
+ if value.empty?
657
+ result[key] = {}
658
+ last_keys << key
659
+ else
660
+ result[key] = value.strip == "null" ? nil : value
661
+ end
662
+ end
663
+ end
664
+
665
+ ##============================================================##
666
+ ## We go over each value then we process if it is a has
667
+ ## | with final newline
668
+ ## |4 with newline and indentation of 4
669
+ ## |- without newline
670
+ ## |4- without newline and indentation of 4
671
+ ##============================================================##
672
+ deep_transform_values(nested_hash) do |value|
673
+ if value.is_a?(Array)
674
+ style_type = value[0]
675
+ indent_supp = style_type.scan(/\d+/).first&.to_i || 0
676
+ indent_supp = [indent_supp - INDENT_SIZE, 0].max
677
+ value[1] = value[1].map {|l| "#{SPACE * indent_supp}#{l}" }
678
+ text = value[1].join(NEWLINE)
679
+ modifier = style_type[-1]
680
+
681
+ case modifier
682
+ when "+"
683
+ text << NEWLINE unless text.end_with?(NEWLINE)
684
+ when "-"
685
+ text.chomp!
686
+ else
687
+ text << NEWLINE unless text.end_with?(NEWLINE)
688
+ end
689
+ text
690
+ else
691
+ value
692
+ end
693
+ end
694
+ end
695
+
696
+
697
+ end
698
+ end
data/lib/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ImmosquareYaml
4
+ VERSION = "0.1.0"
5
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: immosquare-yaml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - IMMO SQUARE
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2023-09-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: iso-639
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.2.5
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.2.5
27
+ description: IMMOSQUARE-YAML is a lightweight and efficient YAML parser designed to
28
+ facilitate the handling of real estate data in YAML format, offering streamlined
29
+ processes and a simplified user experience.
30
+ email:
31
+ - jules@immosquare.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - lib/immosquare-yaml.rb
37
+ - lib/version.rb
38
+ homepage: https://github.com/IMMOSQUARE/immosquare-yaml
39
+ licenses:
40
+ - MIT
41
+ metadata: {}
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 2.6.0
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ requirements: []
57
+ rubygems_version: 3.1.6
58
+ signing_key:
59
+ specification_version: 4
60
+ summary: A YAML parser tailored for real estate solutions.
61
+ test_files: []