immosquare-yaml 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/immosquare-yaml.rb +698 -0
  3. data/lib/version.rb +5 -0
  4. metadata +61 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c14b630cc0f5e121db85ea66b43d18c0d59ae69e2f99db80bdeaf7077b35c5ca
4
+ data.tar.gz: 06073e555050eb9d7971135fcdab6b6ec1d00f29f283c5ab68153c5306a0c8d9
5
+ SHA512:
6
+ metadata.gz: da0e7f956a8cf9e1f5a2bac7d03114961d2b2d2b31d41c40b54fe17d853102bd97012cca89d7425c80fd1b04498a847c10641e65690223ea34aec9fa5c1b5798
7
+ data.tar.gz: 463d134187834be1649dddf650615f440712282d07448b9254de0d57b4c8d64419091a114bc2c4d23630a4cb3c28408202ebc34e7d1b91a30cf6a7ef78e2975e
@@ -0,0 +1,698 @@
1
+ module ImmosquareYaml
2
+ class << self
3
+
4
+ INDENT_SIZE = 2
5
+ SPACE = " ".freeze
6
+ NEWLINE = "\n".freeze
7
+ SIMPLE_QUOTE = "'".freeze
8
+ DOUBLE_QUOTE = '"'.freeze
9
+ DOUBLE_SIMPLE_QUOTE = "''".freeze
10
+ WEIRD_QUOTES_REGEX = /‘|’|“|”|‛|‚|„|‟|#{Regexp.quote(DOUBLE_SIMPLE_QUOTE)}/.freeze
11
+ YML_SPECIAL_CHARS = ["-", "`", "{", "}", "|", "[", "]", ">", ":", "\"", "'", "*", "=", "%", ",", "!", "?", "&", "#", "@"].freeze
12
+ RESERVED_KEYS = [
13
+ "yes", "no", "on", "off", "true", "false",
14
+ "Yes", "No", "On", "Off", "True", "False",
15
+ "YES", "NO", "ON", "OFF", "TRUE", "FALSE"
16
+ ].freeze
17
+
18
+ ##===========================================================================##
19
+ ## This method cleans a specified YAML file by processing it line by line.
20
+ ## It executes a comprehensive cleaning routine, which involves parsing the
21
+ ## YAML content to a hash, optionally sorting it, and then dumping it back
22
+ ## to a YAML format.
23
+ ##
24
+ ## Params:
25
+ ## +file_path+:: Path to the YAML file that needs to be cleaned.
26
+ ## +options+:: A hash of options where :sort controls whether the output should be sorted (default is true).
27
+ ##
28
+ ## Returns:
29
+ ## Boolean indicating the success (true) or failure (false) of the operation.
30
+ ##===========================================================================##
31
+ def clean(file_path, **options)
32
+ ##============================================================##
33
+ ## Default options
34
+ ##============================================================##
35
+ options = {:sort => true}.merge(options)
36
+
37
+ begin
38
+ raise("File not found") if !File.exist?(file_path)
39
+
40
+ ##===========================================================================##
41
+ ## The cleaning procedure is initialized with a comprehensive clean, transforming
42
+ ## the YAML content to a hash to facilitate optional sorting, before
43
+ ## rewriting it to the YAML file in its cleaned and optionally sorted state.
44
+ ##===========================================================================##
45
+ clean_yml(file_path)
46
+ yaml_final = parse(file_path)
47
+ yaml_final = sort_by_key(yaml_final, options[:sort]) if options[:sort]
48
+ yaml_final = dump(yaml_final)
49
+ File.write(file_path, yaml_final)
50
+ true
51
+ rescue StandardError => e
52
+ puts(e.message)
53
+ false
54
+ end
55
+ end
56
+
57
+ ##==========================================================================##
58
+ ## This method parses a specified YAML file, carrying out a preliminary
59
+ ## cleaning operation to ensure a smooth parsing process. Following this,
60
+ ## the cleaned file is transformed into a hash, which can optionally be sorted.
61
+ ## It operates under the assumption that the file is properly structured.
62
+ ##
63
+ ## Params:
64
+ ## +file_path+:: Path to the YAML file that needs to be parsed.
65
+ ## +options+:: A hash of options where :sort controls whether the output should be sorted (default is true).
66
+ ##
67
+ ## Returns:
68
+ ## A hash representation of the YAML file or false if an error occurs.
69
+ ##==========================================================================##
70
+ def parse(file_path, **options)
71
+ options = {:sort => true}.merge(options)
72
+
73
+ begin
74
+ raise("File not found") if !File.exist?(file_path)
75
+
76
+ clean_yml(file_path)
77
+ yaml_final = parse_xml(file_path)
78
+ yaml_final = sort_by_key(yaml_final, options[:sort]) if options[:sort]
79
+ yaml_final
80
+ rescue StandardError => e
81
+ puts(e.message)
82
+ false
83
+ end
84
+ end
85
+
86
+ ##===========================================================================##
87
+ ## This method performs a dump operation to obtain a well-structured
88
+ ## YAML file from a hash input. It iterates through each key-value pair in the
89
+ ## hash and constructs a series of lines representing the YAML file, with
90
+ ## appropriate indentations and handling of various value types including
91
+ ## strings with newline characters.
92
+ ##
93
+ ## Params:
94
+ ## +hash+:: The input hash to be converted into a YAML representation.
95
+ ## +lines+:: An array to hold the constructed lines (default is an empty array).
96
+ ## +indent+:: The current indentation level (default is 0).
97
+ ##
98
+ ## Returns:
99
+ ## A string representing the YAML representation of the input hash.
100
+ ##===========================================================================##
101
+ def dump(hash, lines = [], indent = 0)
102
+ hash.each do |key, value|
103
+ ##===========================================================================##
104
+ ## Preparing the key with the proper indentation before identifying
105
+ ## the type of the value to handle it appropriately in the YAML representation.
106
+ ##===========================================================================##
107
+ line = "#{SPACE * indent}#{clean_key(key)}:"
108
+
109
+ case value
110
+ when nil
111
+ lines << "#{line} null"
112
+ when String
113
+ if value.include?(NEWLINE) || value.include?('\n')
114
+ ##=============================================================##
115
+ ## We display the line with the key
116
+ ## then the indentation if necessary
117
+ ## then - if necessary (the + is not displayed because it is
118
+ ## the default behavior)
119
+ ##=============================================================##
120
+ line += "#{SPACE}|"
121
+ indent_level = value[/\A */].size
122
+ line += (indent_level + INDENT_SIZE).to_s if indent_level > 0
123
+ line += "-" if !value.end_with?(NEWLINE)
124
+ lines << line
125
+
126
+ ##=============================================================##
127
+ ## We parse on the 2 types of line breaks
128
+ ##=============================================================##
129
+ value.split(/\\n|\n/).each do |subline|
130
+ lines << "#{SPACE * (indent + INDENT_SIZE)}#{subline}"
131
+ end
132
+ else
133
+ line += "#{SPACE}#{value}"
134
+ lines << line
135
+ end
136
+ when Hash
137
+ lines << line
138
+ dump(value, lines, indent + INDENT_SIZE)
139
+ end
140
+ end
141
+
142
+ ##===========================================================================##
143
+ ## Finalizing the construction by adding a newline at the end and
144
+ ## removing whitespace from empty lines.
145
+ ##===========================================================================##
146
+ lines += [""]
147
+ lines = lines.map {|l| l.strip.empty? ? "" : l }
148
+ lines.join("\n")
149
+ end
150
+
151
+
152
+ private
153
+
154
+ ##===========================================================================##
155
+ ## This method ensures the file ends with a single newline, facilitating
156
+ ## cleaner multi-line blocks. It operates by reading all lines of the file,
157
+ ## removing any empty lines at the end, and then appending a newline.
158
+ ## This guarantees the presence of a newline at the end, and also prevents
159
+ ## multiple newlines from being present at the end.
160
+ ##
161
+ ## Params:
162
+ ## +file_path+:: The path to the file to be normalized.
163
+ ##
164
+ ## Returns:
165
+ ## The total number of lines in the normalized file.
166
+ ##===========================================================================##
167
+ def normalize_last_line(file_path)
168
+ ##============================================================##
169
+ ## Read all lines from the file
170
+ ## https://gist.github.com/guilhermesimoes/d69e547884e556c3dc95
171
+ ##============================================================##
172
+ lines = File.read(file_path).lines
173
+
174
+ ##============================================================##
175
+ ## Ensure the last line ends with a newline character
176
+ ##============================================================##
177
+ lines[-1] = "#{lines[-1]}#{NEWLINE}" if !lines[-1].end_with?(NEWLINE)
178
+
179
+ ##===========================================================================##
180
+ ## Remove all trailing empty lines at the end of the file
181
+ ##===========================================================================##
182
+ lines.pop while lines.last && lines.last.strip.empty?
183
+
184
+ ##===========================================================================##
185
+ ## Append a newline at the end to maintain the file structure
186
+ ###===========================================================================##
187
+ lines += [NEWLINE]
188
+
189
+ ##===========================================================================##
190
+ ## Write the modified lines back to the file
191
+ ##===========================================================================##
192
+ File.write(file_path, lines.join)
193
+
194
+ ##===========================================================================##
195
+ ## Return the total number of lines in the modified file
196
+ ##===========================================================================##
197
+ lines.size
198
+ end
199
+
200
+ ##============================================================##
201
+ ## Deeply cleans the specified YAML file
202
+ ##============================================================##
203
+ def clean_yml(file_path)
204
+ lines = []
205
+ inblock_indent = nil
206
+ weirdblock_indent = nil
207
+ inblock = false
208
+ weirdblock = false
209
+ line_index = 1
210
+
211
+ ##===================================================================================#
212
+ ## First, we normalize the file by ensuring it always ends with an empty line
213
+ ## This also allows us to get the total number of lines in the file,
214
+ ## helping us to determine when we are processing the last line
215
+ ###===================================================================================#
216
+ line_count = normalize_last_line(file_path)
217
+
218
+
219
+ File.foreach(file_path) do |current_line|
220
+ last_line = line_index == line_count
221
+
222
+ ##===================================================================================#
223
+ ## Cleaning the current line by removing multiple spaces occurring after a non-space character
224
+ ##===================================================================================#
225
+ current_line = current_line.to_s.gsub(/(?<=\S)\s+/, SPACE)
226
+
227
+ ##============================================================##
228
+ ## Trimming potential whitespace characters from the end of the line
229
+ ##============================================================##
230
+ current_line = current_line.rstrip
231
+
232
+
233
+ ##===================================================================================#
234
+ ## Detecting blank lines to specially handle the last line within a block;
235
+ ## if we are inside a block or it's the last line, we avoid skipping
236
+ ##===================================================================================#
237
+ blank_line = current_line.gsub(NEWLINE, "").empty?
238
+ next if !(last_line || inblock || !blank_line)
239
+
240
+ ##============================================================##
241
+ ## Identifying the indentation level of the current line
242
+ ##============================================================##
243
+ last_inblock = inblock
244
+ indent_level = current_line[/\A */].size
245
+ need_to_clean_prev_inblock = inblock == true && ((!blank_line && indent_level <= inblock_indent) || last_line)
246
+ need_to_clen_prev_weirdblock = weirdblock == true && (indent_level <= weirdblock_indent || last_line)
247
+
248
+ ##===================================================================================#
249
+ ## Handling the exit from a block:
250
+ ## if we are exiting a block, we clean the entire block
251
+ ##===================================================================================#
252
+ if need_to_clean_prev_inblock
253
+ inblock = false
254
+ ##============================================================##
255
+ ## Extracting the entire block by tracing back lines until we find a lesser indentation
256
+ ## Subsequently determining the type of block we are in and clean accordingly
257
+ ##============================================================##
258
+ i = -1
259
+ block_indent = lines[i][/\A */].size
260
+ block_lines = [lines[i].lstrip]
261
+ while lines[i][/\A */].size == lines[i - 1][/\A */].size
262
+ block_lines << lines[i - 1].lstrip
263
+ i -= 1
264
+ end
265
+
266
+ ##============================================================##
267
+ ## Handling different types of blocks (literal blocks "|",
268
+ ## folded blocks ">", etc.)
269
+ ## and applying the respective formatting strategies based on
270
+ ## block type and additional indent specified
271
+ ##
272
+ ## | => Literal blocks: It keeps line breaks as
273
+ ## that they are given in the text block.
274
+ ## Final new line: A new line is added to the
275
+ ## end of text.
276
+ ## |- => Literal blocks: It keeps line breaks as
277
+ ## that they are given in the text block.
278
+ ## New final line: The final line break is deleted,
279
+ ## unlike the option |
280
+ ## > Folded blocks: It replaces each new line with a space,
281
+ ## transforming the block of text into a single line.
282
+ ## However, it preserves newlines that follow an empty line.
283
+ ## Final new line: A new line is added at the end of the text.
284
+ ## ===
285
+ ## We can also have |4- or |4+ to say with indentation 4
286
+ ##============================================================##
287
+ block_lines = block_lines.reverse
288
+ block_type = lines[i - 1].split(": ").last
289
+ indent_suppl = block_type.scan(/\d+/).first.to_i
290
+ indent_suppl = indent_suppl > 0 ? indent_suppl - INDENT_SIZE : 0
291
+ case block_type[0]
292
+ when ">"
293
+ lines[i - 1] = lines[i - 1].gsub(">", "|")
294
+ lines[i] = "#{SPACE * (block_indent + indent_suppl)}#{clean_value(block_lines.join(SPACE))}"
295
+ ((i + 1)..-1).to_a.size.times { lines.pop }
296
+ else
297
+ split = clean_value(block_lines.join(NEWLINE), false).split(NEWLINE)
298
+ (i..-1).each do |ii|
299
+ lines[ii] = "#{SPACE * (block_indent + indent_suppl)}#{split.shift}"
300
+ end
301
+ end
302
+ end
303
+
304
+ ##===================================================================================#
305
+ ## Handling 'weirdblocks': cases where multi-line values are enclosed in quotes,
306
+ ## which should actually be single-line values
307
+ ## key: "
308
+ ## line1
309
+ ## line2
310
+ ## line3"
311
+ ## key: '
312
+ ## line1
313
+ ## line2
314
+ ## line3'
315
+ ##============================================================##
316
+ if need_to_clen_prev_weirdblock
317
+ weirdblock = false
318
+ key, value = lines[-1].split(":", 2)
319
+ lines[-1] = "#{key}: #{clean_value(value)}"
320
+ end
321
+
322
+ ##===================================================================================#
323
+ ## Handling keys without values: if the previous line ends with a colon (:) and is not
324
+ ## followed by a value, we assign 'null' as the value
325
+ ##===================================================================================#
326
+ if inblock == false && weirdblock == false && lines[-1] && lines[-1].end_with?(":") && last_inblock == false
327
+ prev_indent = lines[-1][/\A */].size
328
+ lines[-1] += " null" if prev_indent >= indent_level
329
+ end
330
+
331
+ ##============================================================##
332
+ ## Splitting the current line into key and value parts for further processing
333
+ ## You have to split on ":" and not on ": " because we don't have a space when it's
334
+ ## just a key.. but we have a newline
335
+ ## fr: => ["fr", "\n"]
336
+ ##============================================================##
337
+ split = inblock || weirdblock ? [current_line] : current_line.strip.split(":", 2)
338
+ key = inblock || weirdblock ? nil : split[0].to_s.strip
339
+
340
+ ##===================================================================================#
341
+ ## Line processing based on various conditions such as being inside a block,
342
+ ## starting with a comment symbol (#), or being a part of a 'weirdblock'
343
+ ## Each case has its specific line cleaning strategy
344
+ ## ----
345
+ ## If the line is commented out, we keep and we remove newlines
346
+ ##============================================================##
347
+ if current_line.lstrip.start_with?("#")
348
+ lines << current_line.gsub(NEWLINE, "")
349
+ ##================================================= ============##
350
+ ## If is in a block (multiline > | or |-), we clean
351
+ ## the line because it can start with spaces tabs etc.
352
+ ## and put it with the block indenter
353
+ ##================================================= ============##
354
+ elsif inblock == true
355
+ current_line = current_line.gsub(NEWLINE, "").strip
356
+ lines << "#{SPACE * (inblock_indent + INDENT_SIZE)}#{current_line}"
357
+ ##================================================= ============##
358
+ ## if the line ends with a multi-line character and we have a key.
359
+ ## we start a block
360
+ ## The regex works as follows:
361
+ ##=========================================================
362
+ ## \S+ : All non-space characters at the start of the line.
363
+ ## : : Matches the string ": " literally (space included).
364
+ ## [>|] : Matches a single character that is either ">" or "|".
365
+ ## (\d*) : Capture group that matches zero or more digits (0-9).
366
+ ## [-+]? : Matches zero or a character that is either "-" or "+".
367
+ ## $ : Matches the end of the line/string.
368
+ ##================================================= ============##
369
+ elsif current_line.rstrip.match?(/\S+: [>|](\d*)[-+]?$/)
370
+ lines << current_line.gsub(NEWLINE, "")
371
+ inblock_indent = indent_level
372
+ inblock = true
373
+ ##============================================================##
374
+ ## We are in the scenario of a multiline block
375
+ ## but without > | or |- at the end of the line
376
+ ## which should actually be inline.
377
+ ## mykey:
378
+ ## line1
379
+ ## line2
380
+ ## line3
381
+ ## my key: line1 line2 line3
382
+ ##============================================================##
383
+ elsif split.size < 2
384
+ lines[-1] = (lines[-1] + " #{current_line.lstrip}").gsub(NEWLINE, "")
385
+ ##============================================================##
386
+ ## Otherwise we are in the case of a classic line
387
+ ## key: value or key: without value
388
+ ##============================================================##
389
+ else
390
+ key = clean_key(key)
391
+ spaces = (SPACE * indent_level).to_s
392
+ current_line = "#{spaces}#{key}:"
393
+
394
+ if !split[1].empty?
395
+ value = split[1].to_s.strip
396
+
397
+ ##============================================================##
398
+ ## We are in a multiline block which should be an inline
399
+ ## if the value starts with a " and the number of " is odd
400
+ ##============================================================##
401
+ if (value.start_with?(DOUBLE_QUOTE) && value.count(DOUBLE_QUOTE).odd?) || (value.start_with?(SIMPLE_QUOTE) && value.count(SIMPLE_QUOTE).odd?)
402
+ weirdblock = true
403
+ weirdblock_indent = indent_level
404
+ else
405
+ value = clean_value(split[1])
406
+ end
407
+ current_line += " #{value}"
408
+ end
409
+
410
+ ##============================================================##
411
+ ## Merging the cleaned key and value to form the cleaned row
412
+ ##============================================================##
413
+ lines << current_line
414
+ end
415
+
416
+ ##============================================================##
417
+ ## We increment the line number
418
+ ##============================================================##
419
+ line_index += 1
420
+ end
421
+
422
+ ##============================================================##
423
+ ## We finish the file with a newline and we delete
424
+ ## spaces on "empty" lines + double spaces
425
+ ## with the same technique as above
426
+ ##============================================================##
427
+ lines += [""]
428
+ lines = lines.map {|l| (l.strip.empty? ? "" : l).to_s.gsub(/(?<=\S)\s+/, SPACE) }
429
+ File.write(file_path, lines.join(NEWLINE))
430
+ end
431
+
432
+ ##============================================================##
433
+ ## clean_key Function
434
+ ## Purpose: Clean up and standardize YAML keys
435
+ ##============================================================##
436
+ ## Strategy:
437
+ ## 1. Forcefully convert the key to a string to handle gsub operations, especially if it's an integer.
438
+ ## 2. Check if the key is an integer.
439
+ ## 3. Remove quotes if they are present.
440
+ ## 4. Re-add quotes if the key is a reserved word or an integer.
441
+ #
442
+ ## Regular Expression Explanation:
443
+ ## /\A(['“‘”’"])(.*)\1\z/
444
+ ## \A: Matches the start of the string, ensuring our pattern begins at the very start of the string.
445
+ ## (['“‘”’"]): Captures a single quote character. It matches any of the characters specified within the brackets.
446
+ ## This includes various types of single and double quotes.
447
+ ## (.*) : Captures zero or more of any character. It "captures" the entirety of the string between the quotes.
448
+ ## \1: Refers back to the first captured group, ensuring the same type of quote character is found at the end.
449
+ ## \z: Matches the end of the string, ensuring our pattern matches up to the very end.
450
+ #
451
+ ## In the second argument of gsub, we use '\2' to refer back to the content captured by the second capture group.
452
+ ## This allows us to fetch the string without the surrounding quotes.
453
+ ##============================================================##
454
+ def clean_key(key)
455
+ ##============================================================##
456
+ ## Convert key to string to avoid issues with gsub operations
457
+ ## + Check if the key is an integer
458
+ ##============================================================##
459
+ key = key.to_s
460
+ is_int = key =~ /\A[-+]?\d+\z/
461
+
462
+ ##============================================================##
463
+ ## Remove surrounding quotes from the key
464
+ ## Re-add quotes if the key is in the list of reserved keys or is an integer
465
+ ##============================================================##
466
+ key = key.gsub(/\A(['“”‘’"]?)(.*)\1\z/, '\2')
467
+ key = "\"#{key}\"" if key.in?(RESERVED_KEYS) || is_int
468
+ key
469
+ end
470
+
471
+ ##============================================================##
472
+ ## clean_value Function
473
+ ## Purpose: Sanitize and standardize YAML values
474
+ ## In YAML "inblock" scenarios, there's no need to add quotes
475
+ ## around values as it's inherently handled.
476
+ ## ============================================================ ##
477
+ def clean_value(value, with_quotes_verif = true)
478
+ ##============================================================##
479
+ ## Convert value to string to prevent issues in subsequent operations
480
+ ##============================================================##
481
+ value = value.to_s
482
+
483
+ ##============================================================##
484
+ ## Remove newline characters at the end of the value if present.
485
+ ## This should be done prior to strip operation to handle scenarios
486
+ ## where the value ends with a space followed by a newline.
487
+ ###============================================================##
488
+ value = value[0..-2] if value.end_with?(NEWLINE)
489
+
490
+
491
+ ##============================================================##
492
+ ## Clean up the value:
493
+ ## - Remove tabs, carriage returns, form feeds, and vertical tabs.
494
+ ## \t: corresponds to a tab
495
+ ## \r: corresponds to a carriage return
496
+ ## \f: corresponds to a form feed
497
+ ## \v: corresponds to a vertical tab
498
+ ## We keep the \n
499
+ ##============================================================##
500
+ value = value.gsub(/[\t\r\f\v]+/, "")
501
+
502
+ ##============================================================##
503
+ ## Replace multiple spaces with a single space.
504
+ ##============================================================##
505
+ value = value.gsub(/ {2,}/, SPACE)
506
+
507
+ ##============================================================##
508
+ ## Trim leading and trailing spaces.
509
+ ##============================================================##
510
+ value = value.strip
511
+
512
+ ##============================================================##
513
+ ## Replace special quotes with standard single quotes.
514
+ ##============================================================##
515
+ value = value.gsub(WEIRD_QUOTES_REGEX, SIMPLE_QUOTE)
516
+
517
+ ##============================================================##
518
+ ## Remove quotes surrounding the value if they are present.
519
+ ## They will be re-added later if necessary.
520
+ ##============================================================##
521
+ value = value[1..-2] if (value.start_with?(DOUBLE_QUOTE) && value.end_with?(DOUBLE_QUOTE)) || (value.start_with?(SIMPLE_QUOTE) && value.end_with?(SIMPLE_QUOTE))
522
+
523
+ ##============================================================##
524
+ ## Convert emoji representations such as \U0001F600 to their respective emojis.
525
+ ##============================================================##
526
+ value = value.gsub(/\\U([0-9A-Fa-f]{8})/) { [::Regexp.last_match(1).to_i(16)].pack("U*") }
527
+
528
+ ##=============================================================##
529
+ ## Handling cases where the value must be surrounded by quotes
530
+ ## if:
531
+ ## value.include?(": ") => key: text with: here
532
+ ## value.include?(" #") => key: text with # here
533
+ ## value.include?(NEWLINE) => key: Line 1\nLine 2\nLine 3
534
+ ## value.include?('\n') => key: Line 1"\n"Line 2"\n"Line 3
535
+ ## value.start_with?(*YML_SPECIAL_CHARS) => key: @text
536
+ ## value.end_with?(":") => key: text:
537
+ ## value.in?(RESERVED_KEYS) => key: YES
538
+ ## value.start_with?(SPACE) => key: 'text'
539
+ ## value.end_with?(SPACE) => key: text '
540
+ ## else:
541
+ ## management of "" and " ". Not possible to have more spaces
542
+ ## because we have already removed the double spaces
543
+ ##=============================================================##
544
+ if value.present?
545
+ value = "\"#{value}\"" if (value.include?(": ") ||
546
+ value.include?(" #") ||
547
+ value.include?(NEWLINE) ||
548
+ value.include?('\n') ||
549
+ value.start_with?(*YML_SPECIAL_CHARS) ||
550
+ value.end_with?(":") ||
551
+ value.in?(RESERVED_KEYS) ||
552
+ value.start_with?(SPACE) ||
553
+ value.end_with?(SPACE)) &&
554
+ with_quotes_verif == true
555
+
556
+ else
557
+ value = "\"#{value}\""
558
+ end
559
+ value
560
+ end
561
+
562
+ ##============================================================##
563
+ ## Deep transform values resursively
564
+ ##============================================================##
565
+ def deep_transform_values(hash, &block)
566
+ hash.transform_values do |value|
567
+ if value.is_a?(Hash)
568
+ deep_transform_values(value, &block)
569
+ else
570
+ block.call(value)
571
+ end
572
+ end
573
+ end
574
+
575
+ ##============================================================##
576
+ ## sort_by_key Function
577
+ ## Purpose: Sort a hash by its keys, optionally recursively, with
578
+ ## case-insensitive comparison and stripping of double quotes.
579
+ ## ============================================================ #
580
+ def sort_by_key(hash, recursive = false, &block)
581
+ block ||= proc {|a, b| a.to_s.downcase.gsub(DOUBLE_QUOTE, "") <=> b.to_s.downcase.gsub(DOUBLE_QUOTE, "") }
582
+ hash.keys.sort(&block).each_with_object({}) do |key, seed|
583
+ seed[key] = hash[key]
584
+ seed[key] = sort_by_key(seed[key], true, &block) if recursive && seed[key].is_a?(Hash)
585
+ end
586
+ end
587
+
588
+ ##============================================================##
589
+ ## parse_xml Function
590
+ ## Purpose: Parse an XML file into a nested hash representation.
591
+ ##
592
+ ## This method reads through the XML file line by line and creates a
593
+ ## nested hash representation based on the structure and content of the XML.
594
+ ##============================================================##
595
+ def parse_xml(file_path)
596
+ nested_hash = {}
597
+ inblock = nil
598
+ last_keys = []
599
+
600
+ ##============================================================##
601
+ ## We go over each line of the file to create a hash.
602
+ ## We put the multiline blocks in an array to recover
603
+ ## all the values and the formatting type then we will pass
604
+ ## on each of these arrays subsequently to transform them
605
+ ## in the corresponding string
606
+ ##============================================================##
607
+ File.foreach(file_path) do |line|
608
+ ##============================================================##
609
+ ## Determine the indentation level of the line.
610
+ ##============================================================##
611
+ indent_level = line[/\A */].size
612
+
613
+ ##============================================================##
614
+ ## Check for blank lines (which can be present within multi-line blocks)
615
+ ##============================================================##
616
+ blank_line = line.gsub(NEWLINE, "").empty?
617
+
618
+ ##============================================================##
619
+ ## Split the line into key and value.
620
+ ##============================================================##
621
+ split = line.strip.split(":", 2)
622
+ key = split[0].to_s.strip
623
+ inblock = nil if !inblock.nil? && !blank_line && indent_level <= inblock
624
+
625
+
626
+ ##============================================================##
627
+ ## Set the key level based on indentation
628
+ ##============================================================##
629
+ last_keys = last_keys[0, (blank_line ? inblock + INDENT_SIZE : indent_level) / INDENT_SIZE]
630
+
631
+ ##============================================================##
632
+ ## If inside a multi-line block, append the line to the current key's value
633
+ ##============================================================##
634
+ if !inblock.nil?
635
+ current_key = last_keys.last
636
+ parent_keys = last_keys[0..-2]
637
+ result = parent_keys.reduce(nested_hash) {|hash, k| hash[k] }
638
+ result[current_key][1] << line.strip
639
+ ##============================================================##
640
+ ## Handle multi-line key declarations.
641
+ ## We no longer have the >
642
+ ## because it is transformed in the clean_xml into |
643
+ ##============================================================##
644
+ elsif line.gsub("#{key}:", "").strip.start_with?("|")
645
+ inblock = indent_level
646
+ block_type = line.gsub("#{key}:", "").strip
647
+ result = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
648
+ result[key] = [block_type, []]
649
+ last_keys << key
650
+ ##============================================================##
651
+ ## Handle regular key-value pair declarations
652
+ ##============================================================##
653
+ else
654
+ value = split[1].to_s.strip
655
+ result = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
656
+ if value.empty?
657
+ result[key] = {}
658
+ last_keys << key
659
+ else
660
+ result[key] = value.strip == "null" ? nil : value
661
+ end
662
+ end
663
+ end
664
+
665
+ ##============================================================##
666
+ ## We go over each value then we process if it is a has
667
+ ## | with final newline
668
+ ## |4 with newline and indentation of 4
669
+ ## |- without newline
670
+ ## |4- without newline and indentation of 4
671
+ ##============================================================##
672
+ deep_transform_values(nested_hash) do |value|
673
+ if value.is_a?(Array)
674
+ style_type = value[0]
675
+ indent_supp = style_type.scan(/\d+/).first&.to_i || 0
676
+ indent_supp = [indent_supp - INDENT_SIZE, 0].max
677
+ value[1] = value[1].map {|l| "#{SPACE * indent_supp}#{l}" }
678
+ text = value[1].join(NEWLINE)
679
+ modifier = style_type[-1]
680
+
681
+ case modifier
682
+ when "+"
683
+ text << NEWLINE unless text.end_with?(NEWLINE)
684
+ when "-"
685
+ text.chomp!
686
+ else
687
+ text << NEWLINE unless text.end_with?(NEWLINE)
688
+ end
689
+ text
690
+ else
691
+ value
692
+ end
693
+ end
694
+ end
695
+
696
+
697
+ end
698
+ end
data/lib/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ImmosquareYaml
4
+ VERSION = "0.1.0"
5
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: immosquare-yaml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - IMMO SQUARE
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2023-09-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: iso-639
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.2.5
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.2.5
27
+ description: IMMOSQUARE-YAML is a lightweight and efficient YAML parser designed to
28
+ facilitate the handling of real estate data in YAML format, offering streamlined
29
+ processes and a simplified user experience.
30
+ email:
31
+ - jules@immosquare.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - lib/immosquare-yaml.rb
37
+ - lib/version.rb
38
+ homepage: https://github.com/IMMOSQUARE/immosquare-yaml
39
+ licenses:
40
+ - MIT
41
+ metadata: {}
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 2.6.0
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ requirements: []
57
+ rubygems_version: 3.1.6
58
+ signing_key:
59
+ specification_version: 4
60
+ summary: A YAML parser tailored for real estate solutions.
61
+ test_files: []