immosquare-yaml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/immosquare-yaml.rb +698 -0
- data/lib/version.rb +5 -0
- metadata +61 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c14b630cc0f5e121db85ea66b43d18c0d59ae69e2f99db80bdeaf7077b35c5ca
|
4
|
+
data.tar.gz: 06073e555050eb9d7971135fcdab6b6ec1d00f29f283c5ab68153c5306a0c8d9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: da0e7f956a8cf9e1f5a2bac7d03114961d2b2d2b31d41c40b54fe17d853102bd97012cca89d7425c80fd1b04498a847c10641e65690223ea34aec9fa5c1b5798
|
7
|
+
data.tar.gz: 463d134187834be1649dddf650615f440712282d07448b9254de0d57b4c8d64419091a114bc2c4d23630a4cb3c28408202ebc34e7d1b91a30cf6a7ef78e2975e
|
@@ -0,0 +1,698 @@
|
|
1
|
+
module ImmosquareYaml
|
2
|
+
class << self
|
3
|
+
|
4
|
+
INDENT_SIZE = 2
|
5
|
+
SPACE = " ".freeze
|
6
|
+
NEWLINE = "\n".freeze
|
7
|
+
SIMPLE_QUOTE = "'".freeze
|
8
|
+
DOUBLE_QUOTE = '"'.freeze
|
9
|
+
DOUBLE_SIMPLE_QUOTE = "''".freeze
|
10
|
+
WEIRD_QUOTES_REGEX = /‘|’|“|”|‛|‚|„|‟|#{Regexp.quote(DOUBLE_SIMPLE_QUOTE)}/.freeze
|
11
|
+
YML_SPECIAL_CHARS = ["-", "`", "{", "}", "|", "[", "]", ">", ":", "\"", "'", "*", "=", "%", ",", "!", "?", "&", "#", "@"].freeze
|
12
|
+
RESERVED_KEYS = [
|
13
|
+
"yes", "no", "on", "off", "true", "false",
|
14
|
+
"Yes", "No", "On", "Off", "True", "False",
|
15
|
+
"YES", "NO", "ON", "OFF", "TRUE", "FALSE"
|
16
|
+
].freeze
|
17
|
+
|
18
|
+
##===========================================================================##
|
19
|
+
## This method cleans a specified YAML file by processing it line by line.
|
20
|
+
## It executes a comprehensive cleaning routine, which involves parsing the
|
21
|
+
## YAML content to a hash, optionally sorting it, and then dumping it back
|
22
|
+
## to a YAML format.
|
23
|
+
##
|
24
|
+
## Params:
|
25
|
+
## +file_path+:: Path to the YAML file that needs to be cleaned.
|
26
|
+
## +options+:: A hash of options where :sort controls whether the output should be sorted (default is true).
|
27
|
+
##
|
28
|
+
## Returns:
|
29
|
+
## Boolean indicating the success (true) or failure (false) of the operation.
|
30
|
+
##===========================================================================##
|
31
|
+
def clean(file_path, **options)
|
32
|
+
##============================================================##
|
33
|
+
## Default options
|
34
|
+
##============================================================##
|
35
|
+
options = {:sort => true}.merge(options)
|
36
|
+
|
37
|
+
begin
|
38
|
+
raise("File not found") if !File.exist?(file_path)
|
39
|
+
|
40
|
+
##===========================================================================##
|
41
|
+
## The cleaning procedure is initialized with a comprehensive clean, transforming
|
42
|
+
## the YAML content to a hash to facilitate optional sorting, before
|
43
|
+
## rewriting it to the YAML file in its cleaned and optionally sorted state.
|
44
|
+
##===========================================================================##
|
45
|
+
clean_yml(file_path)
|
46
|
+
yaml_final = parse(file_path)
|
47
|
+
yaml_final = sort_by_key(yaml_final, options[:sort]) if options[:sort]
|
48
|
+
yaml_final = dump(yaml_final)
|
49
|
+
File.write(file_path, yaml_final)
|
50
|
+
true
|
51
|
+
rescue StandardError => e
|
52
|
+
puts(e.message)
|
53
|
+
false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
##==========================================================================##
|
58
|
+
## This method parses a specified YAML file, carrying out a preliminary
|
59
|
+
## cleaning operation to ensure a smooth parsing process. Following this,
|
60
|
+
## the cleaned file is transformed into a hash, which can optionally be sorted.
|
61
|
+
## It operates under the assumption that the file is properly structured.
|
62
|
+
##
|
63
|
+
## Params:
|
64
|
+
## +file_path+:: Path to the YAML file that needs to be parsed.
|
65
|
+
## +options+:: A hash of options where :sort controls whether the output should be sorted (default is true).
|
66
|
+
##
|
67
|
+
## Returns:
|
68
|
+
## A hash representation of the YAML file or false if an error occurs.
|
69
|
+
##==========================================================================##
|
70
|
+
def parse(file_path, **options)
|
71
|
+
options = {:sort => true}.merge(options)
|
72
|
+
|
73
|
+
begin
|
74
|
+
raise("File not found") if !File.exist?(file_path)
|
75
|
+
|
76
|
+
clean_yml(file_path)
|
77
|
+
yaml_final = parse_xml(file_path)
|
78
|
+
yaml_final = sort_by_key(yaml_final, options[:sort]) if options[:sort]
|
79
|
+
yaml_final
|
80
|
+
rescue StandardError => e
|
81
|
+
puts(e.message)
|
82
|
+
false
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
##===========================================================================##
|
87
|
+
## This method performs a dump operation to obtain a well-structured
|
88
|
+
## YAML file from a hash input. It iterates through each key-value pair in the
|
89
|
+
## hash and constructs a series of lines representing the YAML file, with
|
90
|
+
## appropriate indentations and handling of various value types including
|
91
|
+
## strings with newline characters.
|
92
|
+
##
|
93
|
+
## Params:
|
94
|
+
## +hash+:: The input hash to be converted into a YAML representation.
|
95
|
+
## +lines+:: An array to hold the constructed lines (default is an empty array).
|
96
|
+
## +indent+:: The current indentation level (default is 0).
|
97
|
+
##
|
98
|
+
## Returns:
|
99
|
+
## A string representing the YAML representation of the input hash.
|
100
|
+
##===========================================================================##
|
101
|
+
def dump(hash, lines = [], indent = 0)
|
102
|
+
hash.each do |key, value|
|
103
|
+
##===========================================================================##
|
104
|
+
## Preparing the key with the proper indentation before identifying
|
105
|
+
## the type of the value to handle it appropriately in the YAML representation.
|
106
|
+
##===========================================================================##
|
107
|
+
line = "#{SPACE * indent}#{clean_key(key)}:"
|
108
|
+
|
109
|
+
case value
|
110
|
+
when nil
|
111
|
+
lines << "#{line} null"
|
112
|
+
when String
|
113
|
+
if value.include?(NEWLINE) || value.include?('\n')
|
114
|
+
##=============================================================##
|
115
|
+
## We display the line with the key
|
116
|
+
## then the indentation if necessary
|
117
|
+
## then - if necessary (the + is not displayed because it is
|
118
|
+
## the default behavior)
|
119
|
+
##=============================================================##
|
120
|
+
line += "#{SPACE}|"
|
121
|
+
indent_level = value[/\A */].size
|
122
|
+
line += (indent_level + INDENT_SIZE).to_s if indent_level > 0
|
123
|
+
line += "-" if !value.end_with?(NEWLINE)
|
124
|
+
lines << line
|
125
|
+
|
126
|
+
##=============================================================##
|
127
|
+
## We parse on the 2 types of line breaks
|
128
|
+
##=============================================================##
|
129
|
+
value.split(/\\n|\n/).each do |subline|
|
130
|
+
lines << "#{SPACE * (indent + INDENT_SIZE)}#{subline}"
|
131
|
+
end
|
132
|
+
else
|
133
|
+
line += "#{SPACE}#{value}"
|
134
|
+
lines << line
|
135
|
+
end
|
136
|
+
when Hash
|
137
|
+
lines << line
|
138
|
+
dump(value, lines, indent + INDENT_SIZE)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
##===========================================================================##
|
143
|
+
## Finalizing the construction by adding a newline at the end and
|
144
|
+
## removing whitespace from empty lines.
|
145
|
+
##===========================================================================##
|
146
|
+
lines += [""]
|
147
|
+
lines = lines.map {|l| l.strip.empty? ? "" : l }
|
148
|
+
lines.join("\n")
|
149
|
+
end
|
150
|
+
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
##===========================================================================##
|
155
|
+
## This method ensures the file ends with a single newline, facilitating
|
156
|
+
## cleaner multi-line blocks. It operates by reading all lines of the file,
|
157
|
+
## removing any empty lines at the end, and then appending a newline.
|
158
|
+
## This guarantees the presence of a newline at the end, and also prevents
|
159
|
+
## multiple newlines from being present at the end.
|
160
|
+
##
|
161
|
+
## Params:
|
162
|
+
## +file_path+:: The path to the file to be normalized.
|
163
|
+
##
|
164
|
+
## Returns:
|
165
|
+
## The total number of lines in the normalized file.
|
166
|
+
##===========================================================================##
|
167
|
+
def normalize_last_line(file_path)
|
168
|
+
##============================================================##
|
169
|
+
## Read all lines from the file
|
170
|
+
## https://gist.github.com/guilhermesimoes/d69e547884e556c3dc95
|
171
|
+
##============================================================##
|
172
|
+
lines = File.read(file_path).lines
|
173
|
+
|
174
|
+
##============================================================##
|
175
|
+
## Ensure the last line ends with a newline character
|
176
|
+
##============================================================##
|
177
|
+
lines[-1] = "#{lines[-1]}#{NEWLINE}" if !lines[-1].end_with?(NEWLINE)
|
178
|
+
|
179
|
+
##===========================================================================##
|
180
|
+
## Remove all trailing empty lines at the end of the file
|
181
|
+
##===========================================================================##
|
182
|
+
lines.pop while lines.last && lines.last.strip.empty?
|
183
|
+
|
184
|
+
##===========================================================================##
|
185
|
+
## Append a newline at the end to maintain the file structure
|
186
|
+
###===========================================================================##
|
187
|
+
lines += [NEWLINE]
|
188
|
+
|
189
|
+
##===========================================================================##
|
190
|
+
## Write the modified lines back to the file
|
191
|
+
##===========================================================================##
|
192
|
+
File.write(file_path, lines.join)
|
193
|
+
|
194
|
+
##===========================================================================##
|
195
|
+
## Return the total number of lines in the modified file
|
196
|
+
##===========================================================================##
|
197
|
+
lines.size
|
198
|
+
end
|
199
|
+
|
200
|
+
##============================================================##
|
201
|
+
## Deeply cleans the specified YAML file
|
202
|
+
##============================================================##
|
203
|
+
def clean_yml(file_path)
|
204
|
+
lines = []
|
205
|
+
inblock_indent = nil
|
206
|
+
weirdblock_indent = nil
|
207
|
+
inblock = false
|
208
|
+
weirdblock = false
|
209
|
+
line_index = 1
|
210
|
+
|
211
|
+
##===================================================================================#
|
212
|
+
## First, we normalize the file by ensuring it always ends with an empty line
|
213
|
+
## This also allows us to get the total number of lines in the file,
|
214
|
+
## helping us to determine when we are processing the last line
|
215
|
+
###===================================================================================#
|
216
|
+
line_count = normalize_last_line(file_path)
|
217
|
+
|
218
|
+
|
219
|
+
File.foreach(file_path) do |current_line|
|
220
|
+
last_line = line_index == line_count
|
221
|
+
|
222
|
+
##===================================================================================#
|
223
|
+
## Cleaning the current line by removing multiple spaces occurring after a non-space character
|
224
|
+
##===================================================================================#
|
225
|
+
current_line = current_line.to_s.gsub(/(?<=\S)\s+/, SPACE)
|
226
|
+
|
227
|
+
##============================================================##
|
228
|
+
## Trimming potential whitespace characters from the end of the line
|
229
|
+
##============================================================##
|
230
|
+
current_line = current_line.rstrip
|
231
|
+
|
232
|
+
|
233
|
+
##===================================================================================#
|
234
|
+
## Detecting blank lines to specially handle the last line within a block;
|
235
|
+
## if we are inside a block or it's the last line, we avoid skipping
|
236
|
+
##===================================================================================#
|
237
|
+
blank_line = current_line.gsub(NEWLINE, "").empty?
|
238
|
+
next if !(last_line || inblock || !blank_line)
|
239
|
+
|
240
|
+
##============================================================##
|
241
|
+
## Identifying the indentation level of the current line
|
242
|
+
##============================================================##
|
243
|
+
last_inblock = inblock
|
244
|
+
indent_level = current_line[/\A */].size
|
245
|
+
need_to_clean_prev_inblock = inblock == true && ((!blank_line && indent_level <= inblock_indent) || last_line)
|
246
|
+
need_to_clen_prev_weirdblock = weirdblock == true && (indent_level <= weirdblock_indent || last_line)
|
247
|
+
|
248
|
+
##===================================================================================#
|
249
|
+
## Handling the exit from a block:
|
250
|
+
## if we are exiting a block, we clean the entire block
|
251
|
+
##===================================================================================#
|
252
|
+
if need_to_clean_prev_inblock
|
253
|
+
inblock = false
|
254
|
+
##============================================================##
|
255
|
+
## Extracting the entire block by tracing back lines until we find a lesser indentation
|
256
|
+
## Subsequently determining the type of block we are in and clean accordingly
|
257
|
+
##============================================================##
|
258
|
+
i = -1
|
259
|
+
block_indent = lines[i][/\A */].size
|
260
|
+
block_lines = [lines[i].lstrip]
|
261
|
+
while lines[i][/\A */].size == lines[i - 1][/\A */].size
|
262
|
+
block_lines << lines[i - 1].lstrip
|
263
|
+
i -= 1
|
264
|
+
end
|
265
|
+
|
266
|
+
##============================================================##
|
267
|
+
## Handling different types of blocks (literal blocks "|",
|
268
|
+
## folded blocks ">", etc.)
|
269
|
+
## and applying the respective formatting strategies based on
|
270
|
+
## block type and additional indent specified
|
271
|
+
##
|
272
|
+
## | => Literal blocks: It keeps line breaks as
|
273
|
+
## that they are given in the text block.
|
274
|
+
## Final new line: A new line is added to the
|
275
|
+
## end of text.
|
276
|
+
## |- => Literal blocks: It keeps line breaks as
|
277
|
+
## that they are given in the text block.
|
278
|
+
## New final line: The final line break is deleted,
|
279
|
+
## unlike the option |
|
280
|
+
## > Folded blocks: It replaces each new line with a space,
|
281
|
+
## transforming the block of text into a single line.
|
282
|
+
## However, it preserves newlines that follow an empty line.
|
283
|
+
## Final new line: A new line is added at the end of the text.
|
284
|
+
## ===
|
285
|
+
## We can also have |4- or |4+ to say with indentation 4
|
286
|
+
##============================================================##
|
287
|
+
block_lines = block_lines.reverse
|
288
|
+
block_type = lines[i - 1].split(": ").last
|
289
|
+
indent_suppl = block_type.scan(/\d+/).first.to_i
|
290
|
+
indent_suppl = indent_suppl > 0 ? indent_suppl - INDENT_SIZE : 0
|
291
|
+
case block_type[0]
|
292
|
+
when ">"
|
293
|
+
lines[i - 1] = lines[i - 1].gsub(">", "|")
|
294
|
+
lines[i] = "#{SPACE * (block_indent + indent_suppl)}#{clean_value(block_lines.join(SPACE))}"
|
295
|
+
((i + 1)..-1).to_a.size.times { lines.pop }
|
296
|
+
else
|
297
|
+
split = clean_value(block_lines.join(NEWLINE), false).split(NEWLINE)
|
298
|
+
(i..-1).each do |ii|
|
299
|
+
lines[ii] = "#{SPACE * (block_indent + indent_suppl)}#{split.shift}"
|
300
|
+
end
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
##===================================================================================#
|
305
|
+
## Handling 'weirdblocks': cases where multi-line values are enclosed in quotes,
|
306
|
+
## which should actually be single-line values
|
307
|
+
## key: "
|
308
|
+
## line1
|
309
|
+
## line2
|
310
|
+
## line3"
|
311
|
+
## key: '
|
312
|
+
## line1
|
313
|
+
## line2
|
314
|
+
## line3'
|
315
|
+
##============================================================##
|
316
|
+
if need_to_clen_prev_weirdblock
|
317
|
+
weirdblock = false
|
318
|
+
key, value = lines[-1].split(":", 2)
|
319
|
+
lines[-1] = "#{key}: #{clean_value(value)}"
|
320
|
+
end
|
321
|
+
|
322
|
+
##===================================================================================#
|
323
|
+
## Handling keys without values: if the previous line ends with a colon (:) and is not
|
324
|
+
## followed by a value, we assign 'null' as the value
|
325
|
+
##===================================================================================#
|
326
|
+
if inblock == false && weirdblock == false && lines[-1] && lines[-1].end_with?(":") && last_inblock == false
|
327
|
+
prev_indent = lines[-1][/\A */].size
|
328
|
+
lines[-1] += " null" if prev_indent >= indent_level
|
329
|
+
end
|
330
|
+
|
331
|
+
##============================================================##
|
332
|
+
## Splitting the current line into key and value parts for further processing
|
333
|
+
## You have to split on ":" and not on ": " because we don't have a space when it's
|
334
|
+
## just a key.. but we have a newline
|
335
|
+
## fr: => ["fr", "\n"]
|
336
|
+
##============================================================##
|
337
|
+
split = inblock || weirdblock ? [current_line] : current_line.strip.split(":", 2)
|
338
|
+
key = inblock || weirdblock ? nil : split[0].to_s.strip
|
339
|
+
|
340
|
+
##===================================================================================#
|
341
|
+
## Line processing based on various conditions such as being inside a block,
|
342
|
+
## starting with a comment symbol (#), or being a part of a 'weirdblock'
|
343
|
+
## Each case has its specific line cleaning strategy
|
344
|
+
## ----
|
345
|
+
## If the line is commented out, we keep and we remove newlines
|
346
|
+
##============================================================##
|
347
|
+
if current_line.lstrip.start_with?("#")
|
348
|
+
lines << current_line.gsub(NEWLINE, "")
|
349
|
+
##================================================= ============##
|
350
|
+
## If is in a block (multiline > | or |-), we clean
|
351
|
+
## the line because it can start with spaces tabs etc.
|
352
|
+
## and put it with the block indenter
|
353
|
+
##================================================= ============##
|
354
|
+
elsif inblock == true
|
355
|
+
current_line = current_line.gsub(NEWLINE, "").strip
|
356
|
+
lines << "#{SPACE * (inblock_indent + INDENT_SIZE)}#{current_line}"
|
357
|
+
##================================================= ============##
|
358
|
+
## if the line ends with a multi-line character and we have a key.
|
359
|
+
## we start a block
|
360
|
+
## The regex works as follows:
|
361
|
+
##=========================================================
|
362
|
+
## \S+ : All non-space characters at the start of the line.
|
363
|
+
## : : Matches the string ": " literally (space included).
|
364
|
+
## [>|] : Matches a single character that is either ">" or "|".
|
365
|
+
## (\d*) : Capture group that matches zero or more digits (0-9).
|
366
|
+
## [-+]? : Matches zero or a character that is either "-" or "+".
|
367
|
+
## $ : Matches the end of the line/string.
|
368
|
+
##================================================= ============##
|
369
|
+
elsif current_line.rstrip.match?(/\S+: [>|](\d*)[-+]?$/)
|
370
|
+
lines << current_line.gsub(NEWLINE, "")
|
371
|
+
inblock_indent = indent_level
|
372
|
+
inblock = true
|
373
|
+
##============================================================##
|
374
|
+
## We are in the scenario of a multiline block
|
375
|
+
## but without > | or |- at the end of the line
|
376
|
+
## which should actually be inline.
|
377
|
+
## mykey:
|
378
|
+
## line1
|
379
|
+
## line2
|
380
|
+
## line3
|
381
|
+
## my key: line1 line2 line3
|
382
|
+
##============================================================##
|
383
|
+
elsif split.size < 2
|
384
|
+
lines[-1] = (lines[-1] + " #{current_line.lstrip}").gsub(NEWLINE, "")
|
385
|
+
##============================================================##
|
386
|
+
## Otherwise we are in the case of a classic line
|
387
|
+
## key: value or key: without value
|
388
|
+
##============================================================##
|
389
|
+
else
|
390
|
+
key = clean_key(key)
|
391
|
+
spaces = (SPACE * indent_level).to_s
|
392
|
+
current_line = "#{spaces}#{key}:"
|
393
|
+
|
394
|
+
if !split[1].empty?
|
395
|
+
value = split[1].to_s.strip
|
396
|
+
|
397
|
+
##============================================================##
|
398
|
+
## We are in a multiline block which should be an inline
|
399
|
+
## if the value starts with a " and the number of " is odd
|
400
|
+
##============================================================##
|
401
|
+
if (value.start_with?(DOUBLE_QUOTE) && value.count(DOUBLE_QUOTE).odd?) || (value.start_with?(SIMPLE_QUOTE) && value.count(SIMPLE_QUOTE).odd?)
|
402
|
+
weirdblock = true
|
403
|
+
weirdblock_indent = indent_level
|
404
|
+
else
|
405
|
+
value = clean_value(split[1])
|
406
|
+
end
|
407
|
+
current_line += " #{value}"
|
408
|
+
end
|
409
|
+
|
410
|
+
##============================================================##
|
411
|
+
## Merging the cleaned key and value to form the cleaned row
|
412
|
+
##============================================================##
|
413
|
+
lines << current_line
|
414
|
+
end
|
415
|
+
|
416
|
+
##============================================================##
|
417
|
+
## We increment the line number
|
418
|
+
##============================================================##
|
419
|
+
line_index += 1
|
420
|
+
end
|
421
|
+
|
422
|
+
##============================================================##
|
423
|
+
## We finish the file with a newline and we delete
|
424
|
+
## spaces on "empty" lines + double spaces
|
425
|
+
## with the same technique as above
|
426
|
+
##============================================================##
|
427
|
+
lines += [""]
|
428
|
+
lines = lines.map {|l| (l.strip.empty? ? "" : l).to_s.gsub(/(?<=\S)\s+/, SPACE) }
|
429
|
+
File.write(file_path, lines.join(NEWLINE))
|
430
|
+
end
|
431
|
+
|
432
|
+
##============================================================##
|
433
|
+
## clean_key Function
|
434
|
+
## Purpose: Clean up and standardize YAML keys
|
435
|
+
##============================================================##
|
436
|
+
## Strategy:
|
437
|
+
## 1. Forcefully convert the key to a string to handle gsub operations, especially if it's an integer.
|
438
|
+
## 2. Check if the key is an integer.
|
439
|
+
## 3. Remove quotes if they are present.
|
440
|
+
## 4. Re-add quotes if the key is a reserved word or an integer.
|
441
|
+
#
|
442
|
+
## Regular Expression Explanation:
|
443
|
+
## /\A(['“‘”’"])(.*)\1\z/
|
444
|
+
## \A: Matches the start of the string, ensuring our pattern begins at the very start of the string.
|
445
|
+
## (['“‘”’"]): Captures a single quote character. It matches any of the characters specified within the brackets.
|
446
|
+
## This includes various types of single and double quotes.
|
447
|
+
## (.*) : Captures zero or more of any character. It "captures" the entirety of the string between the quotes.
|
448
|
+
## \1: Refers back to the first captured group, ensuring the same type of quote character is found at the end.
|
449
|
+
## \z: Matches the end of the string, ensuring our pattern matches up to the very end.
|
450
|
+
#
|
451
|
+
## In the second argument of gsub, we use '\2' to refer back to the content captured by the second capture group.
|
452
|
+
## This allows us to fetch the string without the surrounding quotes.
|
453
|
+
##============================================================##
|
454
|
+
def clean_key(key)
|
455
|
+
##============================================================##
|
456
|
+
## Convert key to string to avoid issues with gsub operations
|
457
|
+
## + Check if the key is an integer
|
458
|
+
##============================================================##
|
459
|
+
key = key.to_s
|
460
|
+
is_int = key =~ /\A[-+]?\d+\z/
|
461
|
+
|
462
|
+
##============================================================##
|
463
|
+
## Remove surrounding quotes from the key
|
464
|
+
## Re-add quotes if the key is in the list of reserved keys or is an integer
|
465
|
+
##============================================================##
|
466
|
+
key = key.gsub(/\A(['“”‘’"]?)(.*)\1\z/, '\2')
|
467
|
+
key = "\"#{key}\"" if key.in?(RESERVED_KEYS) || is_int
|
468
|
+
key
|
469
|
+
end
|
470
|
+
|
471
|
+
##============================================================##
|
472
|
+
## clean_value Function
|
473
|
+
## Purpose: Sanitize and standardize YAML values
|
474
|
+
## In YAML "inblock" scenarios, there's no need to add quotes
|
475
|
+
## around values as it's inherently handled.
|
476
|
+
## ============================================================ ##
|
477
|
+
def clean_value(value, with_quotes_verif = true)
|
478
|
+
##============================================================##
|
479
|
+
## Convert value to string to prevent issues in subsequent operations
|
480
|
+
##============================================================##
|
481
|
+
value = value.to_s
|
482
|
+
|
483
|
+
##============================================================##
|
484
|
+
## Remove newline characters at the end of the value if present.
|
485
|
+
## This should be done prior to strip operation to handle scenarios
|
486
|
+
## where the value ends with a space followed by a newline.
|
487
|
+
###============================================================##
|
488
|
+
value = value[0..-2] if value.end_with?(NEWLINE)
|
489
|
+
|
490
|
+
|
491
|
+
##============================================================##
|
492
|
+
## Clean up the value:
|
493
|
+
## - Remove tabs, carriage returns, form feeds, and vertical tabs.
|
494
|
+
## \t: corresponds to a tab
|
495
|
+
## \r: corresponds to a carriage return
|
496
|
+
## \f: corresponds to a form feed
|
497
|
+
## \v: corresponds to a vertical tab
|
498
|
+
## We keep the \n
|
499
|
+
##============================================================##
|
500
|
+
value = value.gsub(/[\t\r\f\v]+/, "")
|
501
|
+
|
502
|
+
##============================================================##
|
503
|
+
## Replace multiple spaces with a single space.
|
504
|
+
##============================================================##
|
505
|
+
value = value.gsub(/ {2,}/, SPACE)
|
506
|
+
|
507
|
+
##============================================================##
|
508
|
+
## Trim leading and trailing spaces.
|
509
|
+
##============================================================##
|
510
|
+
value = value.strip
|
511
|
+
|
512
|
+
##============================================================##
|
513
|
+
## Replace special quotes with standard single quotes.
|
514
|
+
##============================================================##
|
515
|
+
value = value.gsub(WEIRD_QUOTES_REGEX, SIMPLE_QUOTE)
|
516
|
+
|
517
|
+
##============================================================##
|
518
|
+
## Remove quotes surrounding the value if they are present.
|
519
|
+
## They will be re-added later if necessary.
|
520
|
+
##============================================================##
|
521
|
+
value = value[1..-2] if (value.start_with?(DOUBLE_QUOTE) && value.end_with?(DOUBLE_QUOTE)) || (value.start_with?(SIMPLE_QUOTE) && value.end_with?(SIMPLE_QUOTE))
|
522
|
+
|
523
|
+
##============================================================##
|
524
|
+
## Convert emoji representations such as \U0001F600 to their respective emojis.
|
525
|
+
##============================================================##
|
526
|
+
value = value.gsub(/\\U([0-9A-Fa-f]{8})/) { [::Regexp.last_match(1).to_i(16)].pack("U*") }
|
527
|
+
|
528
|
+
##=============================================================##
|
529
|
+
## Handling cases where the value must be surrounded by quotes
|
530
|
+
## if:
|
531
|
+
## value.include?(": ") => key: text with: here
|
532
|
+
## value.include?(" #") => key: text with # here
|
533
|
+
## value.include?(NEWLINE) => key: Line 1\nLine 2\nLine 3
|
534
|
+
## value.include?('\n') => key: Line 1"\n"Line 2"\n"Line 3
|
535
|
+
## value.start_with?(*YML_SPECIAL_CHARS) => key: @text
|
536
|
+
## value.end_with?(":") => key: text:
|
537
|
+
## value.in?(RESERVED_KEYS) => key: YES
|
538
|
+
## value.start_with?(SPACE) => key: 'text'
|
539
|
+
## value.end_with?(SPACE) => key: text '
|
540
|
+
## else:
|
541
|
+
## management of "" and " ". Not possible to have more spaces
|
542
|
+
## because we have already removed the double spaces
|
543
|
+
##=============================================================##
|
544
|
+
if value.present?
|
545
|
+
value = "\"#{value}\"" if (value.include?(": ") ||
|
546
|
+
value.include?(" #") ||
|
547
|
+
value.include?(NEWLINE) ||
|
548
|
+
value.include?('\n') ||
|
549
|
+
value.start_with?(*YML_SPECIAL_CHARS) ||
|
550
|
+
value.end_with?(":") ||
|
551
|
+
value.in?(RESERVED_KEYS) ||
|
552
|
+
value.start_with?(SPACE) ||
|
553
|
+
value.end_with?(SPACE)) &&
|
554
|
+
with_quotes_verif == true
|
555
|
+
|
556
|
+
else
|
557
|
+
value = "\"#{value}\""
|
558
|
+
end
|
559
|
+
value
|
560
|
+
end
|
561
|
+
|
562
|
+
##============================================================##
|
563
|
+
## Deep transform values resursively
|
564
|
+
##============================================================##
|
565
|
+
def deep_transform_values(hash, &block)
|
566
|
+
hash.transform_values do |value|
|
567
|
+
if value.is_a?(Hash)
|
568
|
+
deep_transform_values(value, &block)
|
569
|
+
else
|
570
|
+
block.call(value)
|
571
|
+
end
|
572
|
+
end
|
573
|
+
end
|
574
|
+
|
575
|
+
##============================================================##
|
576
|
+
## sort_by_key Function
|
577
|
+
## Purpose: Sort a hash by its keys, optionally recursively, with
|
578
|
+
## case-insensitive comparison and stripping of double quotes.
|
579
|
+
## ============================================================ #
|
580
|
+
def sort_by_key(hash, recursive = false, &block)
|
581
|
+
block ||= proc {|a, b| a.to_s.downcase.gsub(DOUBLE_QUOTE, "") <=> b.to_s.downcase.gsub(DOUBLE_QUOTE, "") }
|
582
|
+
hash.keys.sort(&block).each_with_object({}) do |key, seed|
|
583
|
+
seed[key] = hash[key]
|
584
|
+
seed[key] = sort_by_key(seed[key], true, &block) if recursive && seed[key].is_a?(Hash)
|
585
|
+
end
|
586
|
+
end
|
587
|
+
|
588
|
+
##============================================================##
|
589
|
+
## parse_xml Function
|
590
|
+
## Purpose: Parse an XML file into a nested hash representation.
|
591
|
+
##
|
592
|
+
## This method reads through the XML file line by line and creates a
|
593
|
+
## nested hash representation based on the structure and content of the XML.
|
594
|
+
##============================================================##
|
595
|
+
def parse_xml(file_path)
|
596
|
+
nested_hash = {}
|
597
|
+
inblock = nil
|
598
|
+
last_keys = []
|
599
|
+
|
600
|
+
##============================================================##
|
601
|
+
## We go over each line of the file to create a hash.
|
602
|
+
## We put the multiline blocks in an array to recover
|
603
|
+
## all the values and the formatting type then we will pass
|
604
|
+
## on each of these arrays subsequently to transform them
|
605
|
+
## in the corresponding string
|
606
|
+
##============================================================##
|
607
|
+
File.foreach(file_path) do |line|
|
608
|
+
##============================================================##
|
609
|
+
## Determine the indentation level of the line.
|
610
|
+
##============================================================##
|
611
|
+
indent_level = line[/\A */].size
|
612
|
+
|
613
|
+
##============================================================##
|
614
|
+
## Check for blank lines (which can be present within multi-line blocks)
|
615
|
+
##============================================================##
|
616
|
+
blank_line = line.gsub(NEWLINE, "").empty?
|
617
|
+
|
618
|
+
##============================================================##
|
619
|
+
## Split the line into key and value.
|
620
|
+
##============================================================##
|
621
|
+
split = line.strip.split(":", 2)
|
622
|
+
key = split[0].to_s.strip
|
623
|
+
inblock = nil if !inblock.nil? && !blank_line && indent_level <= inblock
|
624
|
+
|
625
|
+
|
626
|
+
##============================================================##
|
627
|
+
## Set the key level based on indentation
|
628
|
+
##============================================================##
|
629
|
+
last_keys = last_keys[0, (blank_line ? inblock + INDENT_SIZE : indent_level) / INDENT_SIZE]
|
630
|
+
|
631
|
+
##============================================================##
|
632
|
+
## If inside a multi-line block, append the line to the current key's value
|
633
|
+
##============================================================##
|
634
|
+
if !inblock.nil?
|
635
|
+
current_key = last_keys.last
|
636
|
+
parent_keys = last_keys[0..-2]
|
637
|
+
result = parent_keys.reduce(nested_hash) {|hash, k| hash[k] }
|
638
|
+
result[current_key][1] << line.strip
|
639
|
+
##============================================================##
|
640
|
+
## Handle multi-line key declarations.
|
641
|
+
## We no longer have the >
|
642
|
+
## because it is transformed in the clean_xml into |
|
643
|
+
##============================================================##
|
644
|
+
elsif line.gsub("#{key}:", "").strip.start_with?("|")
|
645
|
+
inblock = indent_level
|
646
|
+
block_type = line.gsub("#{key}:", "").strip
|
647
|
+
result = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
|
648
|
+
result[key] = [block_type, []]
|
649
|
+
last_keys << key
|
650
|
+
##============================================================##
|
651
|
+
## Handle regular key-value pair declarations
|
652
|
+
##============================================================##
|
653
|
+
else
|
654
|
+
value = split[1].to_s.strip
|
655
|
+
result = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
|
656
|
+
if value.empty?
|
657
|
+
result[key] = {}
|
658
|
+
last_keys << key
|
659
|
+
else
|
660
|
+
result[key] = value.strip == "null" ? nil : value
|
661
|
+
end
|
662
|
+
end
|
663
|
+
end
|
664
|
+
|
665
|
+
##============================================================##
|
666
|
+
## We go over each value then we process if it is a has
|
667
|
+
## | with final newline
|
668
|
+
## |4 with newline and indentation of 4
|
669
|
+
## |- without newline
|
670
|
+
## |4- without newline and indentation of 4
|
671
|
+
##============================================================##
|
672
|
+
deep_transform_values(nested_hash) do |value|
|
673
|
+
if value.is_a?(Array)
|
674
|
+
style_type = value[0]
|
675
|
+
indent_supp = style_type.scan(/\d+/).first&.to_i || 0
|
676
|
+
indent_supp = [indent_supp - INDENT_SIZE, 0].max
|
677
|
+
value[1] = value[1].map {|l| "#{SPACE * indent_supp}#{l}" }
|
678
|
+
text = value[1].join(NEWLINE)
|
679
|
+
modifier = style_type[-1]
|
680
|
+
|
681
|
+
case modifier
|
682
|
+
when "+"
|
683
|
+
text << NEWLINE unless text.end_with?(NEWLINE)
|
684
|
+
when "-"
|
685
|
+
text.chomp!
|
686
|
+
else
|
687
|
+
text << NEWLINE unless text.end_with?(NEWLINE)
|
688
|
+
end
|
689
|
+
text
|
690
|
+
else
|
691
|
+
value
|
692
|
+
end
|
693
|
+
end
|
694
|
+
end
|
695
|
+
|
696
|
+
|
697
|
+
end
|
698
|
+
end
|
data/lib/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: immosquare-yaml
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- IMMO SQUARE
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-09-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: iso-639
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.2.5
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.2.5
|
27
|
+
description: IMMOSQUARE-YAML is a lightweight and efficient YAML parser designed to
|
28
|
+
facilitate the handling of real estate data in YAML format, offering streamlined
|
29
|
+
processes and a simplified user experience.
|
30
|
+
email:
|
31
|
+
- jules@immosquare.com
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- lib/immosquare-yaml.rb
|
37
|
+
- lib/version.rb
|
38
|
+
homepage: https://github.com/IMMOSQUARE/immosquare-yaml
|
39
|
+
licenses:
|
40
|
+
- MIT
|
41
|
+
metadata: {}
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options: []
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 2.6.0
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
requirements: []
|
57
|
+
rubygems_version: 3.1.6
|
58
|
+
signing_key:
|
59
|
+
specification_version: 4
|
60
|
+
summary: A YAML parser tailored for real estate solutions.
|
61
|
+
test_files: []
|