immosquare-yaml 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/immosquare-yaml.rb +698 -0
- data/lib/version.rb +5 -0
- metadata +61 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c14b630cc0f5e121db85ea66b43d18c0d59ae69e2f99db80bdeaf7077b35c5ca
|
4
|
+
data.tar.gz: 06073e555050eb9d7971135fcdab6b6ec1d00f29f283c5ab68153c5306a0c8d9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: da0e7f956a8cf9e1f5a2bac7d03114961d2b2d2b31d41c40b54fe17d853102bd97012cca89d7425c80fd1b04498a847c10641e65690223ea34aec9fa5c1b5798
|
7
|
+
data.tar.gz: 463d134187834be1649dddf650615f440712282d07448b9254de0d57b4c8d64419091a114bc2c4d23630a4cb3c28408202ebc34e7d1b91a30cf6a7ef78e2975e
|
@@ -0,0 +1,698 @@
|
|
1
|
+
module ImmosquareYaml
|
2
|
+
class << self
|
3
|
+
|
4
|
+
INDENT_SIZE = 2
|
5
|
+
SPACE = " ".freeze
|
6
|
+
NEWLINE = "\n".freeze
|
7
|
+
SIMPLE_QUOTE = "'".freeze
|
8
|
+
DOUBLE_QUOTE = '"'.freeze
|
9
|
+
DOUBLE_SIMPLE_QUOTE = "''".freeze
|
10
|
+
WEIRD_QUOTES_REGEX = /‘|’|“|”|‛|‚|„|‟|#{Regexp.quote(DOUBLE_SIMPLE_QUOTE)}/.freeze
|
11
|
+
YML_SPECIAL_CHARS = ["-", "`", "{", "}", "|", "[", "]", ">", ":", "\"", "'", "*", "=", "%", ",", "!", "?", "&", "#", "@"].freeze
|
12
|
+
RESERVED_KEYS = [
|
13
|
+
"yes", "no", "on", "off", "true", "false",
|
14
|
+
"Yes", "No", "On", "Off", "True", "False",
|
15
|
+
"YES", "NO", "ON", "OFF", "TRUE", "FALSE"
|
16
|
+
].freeze
|
17
|
+
|
18
|
+
##===========================================================================##
|
19
|
+
## This method cleans a specified YAML file by processing it line by line.
|
20
|
+
## It executes a comprehensive cleaning routine, which involves parsing the
|
21
|
+
## YAML content to a hash, optionally sorting it, and then dumping it back
|
22
|
+
## to a YAML format.
|
23
|
+
##
|
24
|
+
## Params:
|
25
|
+
## +file_path+:: Path to the YAML file that needs to be cleaned.
|
26
|
+
## +options+:: A hash of options where :sort controls whether the output should be sorted (default is true).
|
27
|
+
##
|
28
|
+
## Returns:
|
29
|
+
## Boolean indicating the success (true) or failure (false) of the operation.
|
30
|
+
##===========================================================================##
|
31
|
+
def clean(file_path, **options)
|
32
|
+
##============================================================##
|
33
|
+
## Default options
|
34
|
+
##============================================================##
|
35
|
+
options = {:sort => true}.merge(options)
|
36
|
+
|
37
|
+
begin
|
38
|
+
raise("File not found") if !File.exist?(file_path)
|
39
|
+
|
40
|
+
##===========================================================================##
|
41
|
+
## The cleaning procedure is initialized with a comprehensive clean, transforming
|
42
|
+
## the YAML content to a hash to facilitate optional sorting, before
|
43
|
+
## rewriting it to the YAML file in its cleaned and optionally sorted state.
|
44
|
+
##===========================================================================##
|
45
|
+
clean_yml(file_path)
|
46
|
+
yaml_final = parse(file_path)
|
47
|
+
yaml_final = sort_by_key(yaml_final, options[:sort]) if options[:sort]
|
48
|
+
yaml_final = dump(yaml_final)
|
49
|
+
File.write(file_path, yaml_final)
|
50
|
+
true
|
51
|
+
rescue StandardError => e
|
52
|
+
puts(e.message)
|
53
|
+
false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
##==========================================================================##
|
58
|
+
## This method parses a specified YAML file, carrying out a preliminary
|
59
|
+
## cleaning operation to ensure a smooth parsing process. Following this,
|
60
|
+
## the cleaned file is transformed into a hash, which can optionally be sorted.
|
61
|
+
## It operates under the assumption that the file is properly structured.
|
62
|
+
##
|
63
|
+
## Params:
|
64
|
+
## +file_path+:: Path to the YAML file that needs to be parsed.
|
65
|
+
## +options+:: A hash of options where :sort controls whether the output should be sorted (default is true).
|
66
|
+
##
|
67
|
+
## Returns:
|
68
|
+
## A hash representation of the YAML file or false if an error occurs.
|
69
|
+
##==========================================================================##
|
70
|
+
def parse(file_path, **options)
|
71
|
+
options = {:sort => true}.merge(options)
|
72
|
+
|
73
|
+
begin
|
74
|
+
raise("File not found") if !File.exist?(file_path)
|
75
|
+
|
76
|
+
clean_yml(file_path)
|
77
|
+
yaml_final = parse_xml(file_path)
|
78
|
+
yaml_final = sort_by_key(yaml_final, options[:sort]) if options[:sort]
|
79
|
+
yaml_final
|
80
|
+
rescue StandardError => e
|
81
|
+
puts(e.message)
|
82
|
+
false
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
##===========================================================================##
|
87
|
+
## This method performs a dump operation to obtain a well-structured
|
88
|
+
## YAML file from a hash input. It iterates through each key-value pair in the
|
89
|
+
## hash and constructs a series of lines representing the YAML file, with
|
90
|
+
## appropriate indentations and handling of various value types including
|
91
|
+
## strings with newline characters.
|
92
|
+
##
|
93
|
+
## Params:
|
94
|
+
## +hash+:: The input hash to be converted into a YAML representation.
|
95
|
+
## +lines+:: An array to hold the constructed lines (default is an empty array).
|
96
|
+
## +indent+:: The current indentation level (default is 0).
|
97
|
+
##
|
98
|
+
## Returns:
|
99
|
+
## A string representing the YAML representation of the input hash.
|
100
|
+
##===========================================================================##
|
101
|
+
def dump(hash, lines = [], indent = 0)
|
102
|
+
hash.each do |key, value|
|
103
|
+
##===========================================================================##
|
104
|
+
## Preparing the key with the proper indentation before identifying
|
105
|
+
## the type of the value to handle it appropriately in the YAML representation.
|
106
|
+
##===========================================================================##
|
107
|
+
line = "#{SPACE * indent}#{clean_key(key)}:"
|
108
|
+
|
109
|
+
case value
|
110
|
+
when nil
|
111
|
+
lines << "#{line} null"
|
112
|
+
when String
|
113
|
+
if value.include?(NEWLINE) || value.include?('\n')
|
114
|
+
##=============================================================##
|
115
|
+
## We display the line with the key
|
116
|
+
## then the indentation if necessary
|
117
|
+
## then - if necessary (the + is not displayed because it is
|
118
|
+
## the default behavior)
|
119
|
+
##=============================================================##
|
120
|
+
line += "#{SPACE}|"
|
121
|
+
indent_level = value[/\A */].size
|
122
|
+
line += (indent_level + INDENT_SIZE).to_s if indent_level > 0
|
123
|
+
line += "-" if !value.end_with?(NEWLINE)
|
124
|
+
lines << line
|
125
|
+
|
126
|
+
##=============================================================##
|
127
|
+
## We parse on the 2 types of line breaks
|
128
|
+
##=============================================================##
|
129
|
+
value.split(/\\n|\n/).each do |subline|
|
130
|
+
lines << "#{SPACE * (indent + INDENT_SIZE)}#{subline}"
|
131
|
+
end
|
132
|
+
else
|
133
|
+
line += "#{SPACE}#{value}"
|
134
|
+
lines << line
|
135
|
+
end
|
136
|
+
when Hash
|
137
|
+
lines << line
|
138
|
+
dump(value, lines, indent + INDENT_SIZE)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
##===========================================================================##
|
143
|
+
## Finalizing the construction by adding a newline at the end and
|
144
|
+
## removing whitespace from empty lines.
|
145
|
+
##===========================================================================##
|
146
|
+
lines += [""]
|
147
|
+
lines = lines.map {|l| l.strip.empty? ? "" : l }
|
148
|
+
lines.join("\n")
|
149
|
+
end
|
150
|
+
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
##===========================================================================##
|
155
|
+
## This method ensures the file ends with a single newline, facilitating
|
156
|
+
## cleaner multi-line blocks. It operates by reading all lines of the file,
|
157
|
+
## removing any empty lines at the end, and then appending a newline.
|
158
|
+
## This guarantees the presence of a newline at the end, and also prevents
|
159
|
+
## multiple newlines from being present at the end.
|
160
|
+
##
|
161
|
+
## Params:
|
162
|
+
## +file_path+:: The path to the file to be normalized.
|
163
|
+
##
|
164
|
+
## Returns:
|
165
|
+
## The total number of lines in the normalized file.
|
166
|
+
##===========================================================================##
|
167
|
+
def normalize_last_line(file_path)
|
168
|
+
##============================================================##
|
169
|
+
## Read all lines from the file
|
170
|
+
## https://gist.github.com/guilhermesimoes/d69e547884e556c3dc95
|
171
|
+
##============================================================##
|
172
|
+
lines = File.read(file_path).lines
|
173
|
+
|
174
|
+
##============================================================##
|
175
|
+
## Ensure the last line ends with a newline character
|
176
|
+
##============================================================##
|
177
|
+
lines[-1] = "#{lines[-1]}#{NEWLINE}" if !lines[-1].end_with?(NEWLINE)
|
178
|
+
|
179
|
+
##===========================================================================##
|
180
|
+
## Remove all trailing empty lines at the end of the file
|
181
|
+
##===========================================================================##
|
182
|
+
lines.pop while lines.last && lines.last.strip.empty?
|
183
|
+
|
184
|
+
##===========================================================================##
|
185
|
+
## Append a newline at the end to maintain the file structure
|
186
|
+
###===========================================================================##
|
187
|
+
lines += [NEWLINE]
|
188
|
+
|
189
|
+
##===========================================================================##
|
190
|
+
## Write the modified lines back to the file
|
191
|
+
##===========================================================================##
|
192
|
+
File.write(file_path, lines.join)
|
193
|
+
|
194
|
+
##===========================================================================##
|
195
|
+
## Return the total number of lines in the modified file
|
196
|
+
##===========================================================================##
|
197
|
+
lines.size
|
198
|
+
end
|
199
|
+
|
200
|
+
##============================================================##
|
201
|
+
## Deeply cleans the specified YAML file
|
202
|
+
##============================================================##
|
203
|
+
def clean_yml(file_path)
|
204
|
+
lines = []
|
205
|
+
inblock_indent = nil
|
206
|
+
weirdblock_indent = nil
|
207
|
+
inblock = false
|
208
|
+
weirdblock = false
|
209
|
+
line_index = 1
|
210
|
+
|
211
|
+
##===================================================================================#
|
212
|
+
## First, we normalize the file by ensuring it always ends with an empty line
|
213
|
+
## This also allows us to get the total number of lines in the file,
|
214
|
+
## helping us to determine when we are processing the last line
|
215
|
+
###===================================================================================#
|
216
|
+
line_count = normalize_last_line(file_path)
|
217
|
+
|
218
|
+
|
219
|
+
File.foreach(file_path) do |current_line|
|
220
|
+
last_line = line_index == line_count
|
221
|
+
|
222
|
+
##===================================================================================#
|
223
|
+
## Cleaning the current line by removing multiple spaces occurring after a non-space character
|
224
|
+
##===================================================================================#
|
225
|
+
current_line = current_line.to_s.gsub(/(?<=\S)\s+/, SPACE)
|
226
|
+
|
227
|
+
##============================================================##
|
228
|
+
## Trimming potential whitespace characters from the end of the line
|
229
|
+
##============================================================##
|
230
|
+
current_line = current_line.rstrip
|
231
|
+
|
232
|
+
|
233
|
+
##===================================================================================#
|
234
|
+
## Detecting blank lines to specially handle the last line within a block;
|
235
|
+
## if we are inside a block or it's the last line, we avoid skipping
|
236
|
+
##===================================================================================#
|
237
|
+
blank_line = current_line.gsub(NEWLINE, "").empty?
|
238
|
+
next if !(last_line || inblock || !blank_line)
|
239
|
+
|
240
|
+
##============================================================##
|
241
|
+
## Identifying the indentation level of the current line
|
242
|
+
##============================================================##
|
243
|
+
last_inblock = inblock
|
244
|
+
indent_level = current_line[/\A */].size
|
245
|
+
need_to_clean_prev_inblock = inblock == true && ((!blank_line && indent_level <= inblock_indent) || last_line)
|
246
|
+
need_to_clen_prev_weirdblock = weirdblock == true && (indent_level <= weirdblock_indent || last_line)
|
247
|
+
|
248
|
+
##===================================================================================#
|
249
|
+
## Handling the exit from a block:
|
250
|
+
## if we are exiting a block, we clean the entire block
|
251
|
+
##===================================================================================#
|
252
|
+
if need_to_clean_prev_inblock
|
253
|
+
inblock = false
|
254
|
+
##============================================================##
|
255
|
+
## Extracting the entire block by tracing back lines until we find a lesser indentation
|
256
|
+
## Subsequently determining the type of block we are in and clean accordingly
|
257
|
+
##============================================================##
|
258
|
+
i = -1
|
259
|
+
block_indent = lines[i][/\A */].size
|
260
|
+
block_lines = [lines[i].lstrip]
|
261
|
+
while lines[i][/\A */].size == lines[i - 1][/\A */].size
|
262
|
+
block_lines << lines[i - 1].lstrip
|
263
|
+
i -= 1
|
264
|
+
end
|
265
|
+
|
266
|
+
##============================================================##
|
267
|
+
## Handling different types of blocks (literal blocks "|",
|
268
|
+
## folded blocks ">", etc.)
|
269
|
+
## and applying the respective formatting strategies based on
|
270
|
+
## block type and additional indent specified
|
271
|
+
##
|
272
|
+
## | => Literal blocks: It keeps line breaks as
|
273
|
+
## that they are given in the text block.
|
274
|
+
## Final new line: A new line is added to the
|
275
|
+
## end of text.
|
276
|
+
## |- => Literal blocks: It keeps line breaks as
|
277
|
+
## that they are given in the text block.
|
278
|
+
## New final line: The final line break is deleted,
|
279
|
+
## unlike the option |
|
280
|
+
## > Folded blocks: It replaces each new line with a space,
|
281
|
+
## transforming the block of text into a single line.
|
282
|
+
## However, it preserves newlines that follow an empty line.
|
283
|
+
## Final new line: A new line is added at the end of the text.
|
284
|
+
## ===
|
285
|
+
## We can also have |4- or |4+ to say with indentation 4
|
286
|
+
##============================================================##
|
287
|
+
block_lines = block_lines.reverse
|
288
|
+
block_type = lines[i - 1].split(": ").last
|
289
|
+
indent_suppl = block_type.scan(/\d+/).first.to_i
|
290
|
+
indent_suppl = indent_suppl > 0 ? indent_suppl - INDENT_SIZE : 0
|
291
|
+
case block_type[0]
|
292
|
+
when ">"
|
293
|
+
lines[i - 1] = lines[i - 1].gsub(">", "|")
|
294
|
+
lines[i] = "#{SPACE * (block_indent + indent_suppl)}#{clean_value(block_lines.join(SPACE))}"
|
295
|
+
((i + 1)..-1).to_a.size.times { lines.pop }
|
296
|
+
else
|
297
|
+
split = clean_value(block_lines.join(NEWLINE), false).split(NEWLINE)
|
298
|
+
(i..-1).each do |ii|
|
299
|
+
lines[ii] = "#{SPACE * (block_indent + indent_suppl)}#{split.shift}"
|
300
|
+
end
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
##===================================================================================#
|
305
|
+
## Handling 'weirdblocks': cases where multi-line values are enclosed in quotes,
|
306
|
+
## which should actually be single-line values
|
307
|
+
## key: "
|
308
|
+
## line1
|
309
|
+
## line2
|
310
|
+
## line3"
|
311
|
+
## key: '
|
312
|
+
## line1
|
313
|
+
## line2
|
314
|
+
## line3'
|
315
|
+
##============================================================##
|
316
|
+
if need_to_clen_prev_weirdblock
|
317
|
+
weirdblock = false
|
318
|
+
key, value = lines[-1].split(":", 2)
|
319
|
+
lines[-1] = "#{key}: #{clean_value(value)}"
|
320
|
+
end
|
321
|
+
|
322
|
+
##===================================================================================#
|
323
|
+
## Handling keys without values: if the previous line ends with a colon (:) and is not
|
324
|
+
## followed by a value, we assign 'null' as the value
|
325
|
+
##===================================================================================#
|
326
|
+
if inblock == false && weirdblock == false && lines[-1] && lines[-1].end_with?(":") && last_inblock == false
|
327
|
+
prev_indent = lines[-1][/\A */].size
|
328
|
+
lines[-1] += " null" if prev_indent >= indent_level
|
329
|
+
end
|
330
|
+
|
331
|
+
##============================================================##
|
332
|
+
## Splitting the current line into key and value parts for further processing
|
333
|
+
## You have to split on ":" and not on ": " because we don't have a space when it's
|
334
|
+
## just a key.. but we have a newline
|
335
|
+
## fr: => ["fr", "\n"]
|
336
|
+
##============================================================##
|
337
|
+
split = inblock || weirdblock ? [current_line] : current_line.strip.split(":", 2)
|
338
|
+
key = inblock || weirdblock ? nil : split[0].to_s.strip
|
339
|
+
|
340
|
+
##===================================================================================#
|
341
|
+
## Line processing based on various conditions such as being inside a block,
|
342
|
+
## starting with a comment symbol (#), or being a part of a 'weirdblock'
|
343
|
+
## Each case has its specific line cleaning strategy
|
344
|
+
## ----
|
345
|
+
## If the line is commented out, we keep and we remove newlines
|
346
|
+
##============================================================##
|
347
|
+
if current_line.lstrip.start_with?("#")
|
348
|
+
lines << current_line.gsub(NEWLINE, "")
|
349
|
+
##================================================= ============##
|
350
|
+
## If is in a block (multiline > | or |-), we clean
|
351
|
+
## the line because it can start with spaces tabs etc.
|
352
|
+
## and put it with the block indenter
|
353
|
+
##================================================= ============##
|
354
|
+
elsif inblock == true
|
355
|
+
current_line = current_line.gsub(NEWLINE, "").strip
|
356
|
+
lines << "#{SPACE * (inblock_indent + INDENT_SIZE)}#{current_line}"
|
357
|
+
##================================================= ============##
|
358
|
+
## if the line ends with a multi-line character and we have a key.
|
359
|
+
## we start a block
|
360
|
+
## The regex works as follows:
|
361
|
+
##=========================================================
|
362
|
+
## \S+ : All non-space characters at the start of the line.
|
363
|
+
## : : Matches the string ": " literally (space included).
|
364
|
+
## [>|] : Matches a single character that is either ">" or "|".
|
365
|
+
## (\d*) : Capture group that matches zero or more digits (0-9).
|
366
|
+
## [-+]? : Matches zero or a character that is either "-" or "+".
|
367
|
+
## $ : Matches the end of the line/string.
|
368
|
+
##================================================= ============##
|
369
|
+
elsif current_line.rstrip.match?(/\S+: [>|](\d*)[-+]?$/)
|
370
|
+
lines << current_line.gsub(NEWLINE, "")
|
371
|
+
inblock_indent = indent_level
|
372
|
+
inblock = true
|
373
|
+
##============================================================##
|
374
|
+
## We are in the scenario of a multiline block
|
375
|
+
## but without > | or |- at the end of the line
|
376
|
+
## which should actually be inline.
|
377
|
+
## mykey:
|
378
|
+
## line1
|
379
|
+
## line2
|
380
|
+
## line3
|
381
|
+
## my key: line1 line2 line3
|
382
|
+
##============================================================##
|
383
|
+
elsif split.size < 2
|
384
|
+
lines[-1] = (lines[-1] + " #{current_line.lstrip}").gsub(NEWLINE, "")
|
385
|
+
##============================================================##
|
386
|
+
## Otherwise we are in the case of a classic line
|
387
|
+
## key: value or key: without value
|
388
|
+
##============================================================##
|
389
|
+
else
|
390
|
+
key = clean_key(key)
|
391
|
+
spaces = (SPACE * indent_level).to_s
|
392
|
+
current_line = "#{spaces}#{key}:"
|
393
|
+
|
394
|
+
if !split[1].empty?
|
395
|
+
value = split[1].to_s.strip
|
396
|
+
|
397
|
+
##============================================================##
|
398
|
+
## We are in a multiline block which should be an inline
|
399
|
+
## if the value starts with a " and the number of " is odd
|
400
|
+
##============================================================##
|
401
|
+
if (value.start_with?(DOUBLE_QUOTE) && value.count(DOUBLE_QUOTE).odd?) || (value.start_with?(SIMPLE_QUOTE) && value.count(SIMPLE_QUOTE).odd?)
|
402
|
+
weirdblock = true
|
403
|
+
weirdblock_indent = indent_level
|
404
|
+
else
|
405
|
+
value = clean_value(split[1])
|
406
|
+
end
|
407
|
+
current_line += " #{value}"
|
408
|
+
end
|
409
|
+
|
410
|
+
##============================================================##
|
411
|
+
## Merging the cleaned key and value to form the cleaned row
|
412
|
+
##============================================================##
|
413
|
+
lines << current_line
|
414
|
+
end
|
415
|
+
|
416
|
+
##============================================================##
|
417
|
+
## We increment the line number
|
418
|
+
##============================================================##
|
419
|
+
line_index += 1
|
420
|
+
end
|
421
|
+
|
422
|
+
##============================================================##
|
423
|
+
## We finish the file with a newline and we delete
|
424
|
+
## spaces on "empty" lines + double spaces
|
425
|
+
## with the same technique as above
|
426
|
+
##============================================================##
|
427
|
+
lines += [""]
|
428
|
+
lines = lines.map {|l| (l.strip.empty? ? "" : l).to_s.gsub(/(?<=\S)\s+/, SPACE) }
|
429
|
+
File.write(file_path, lines.join(NEWLINE))
|
430
|
+
end
|
431
|
+
|
432
|
+
##============================================================##
|
433
|
+
## clean_key Function
|
434
|
+
## Purpose: Clean up and standardize YAML keys
|
435
|
+
##============================================================##
|
436
|
+
## Strategy:
|
437
|
+
## 1. Forcefully convert the key to a string to handle gsub operations, especially if it's an integer.
|
438
|
+
## 2. Check if the key is an integer.
|
439
|
+
## 3. Remove quotes if they are present.
|
440
|
+
## 4. Re-add quotes if the key is a reserved word or an integer.
|
441
|
+
#
|
442
|
+
## Regular Expression Explanation:
|
443
|
+
## /\A(['“‘”’"])(.*)\1\z/
|
444
|
+
## \A: Matches the start of the string, ensuring our pattern begins at the very start of the string.
|
445
|
+
## (['“‘”’"]): Captures a single quote character. It matches any of the characters specified within the brackets.
|
446
|
+
## This includes various types of single and double quotes.
|
447
|
+
## (.*) : Captures zero or more of any character. It "captures" the entirety of the string between the quotes.
|
448
|
+
## \1: Refers back to the first captured group, ensuring the same type of quote character is found at the end.
|
449
|
+
## \z: Matches the end of the string, ensuring our pattern matches up to the very end.
|
450
|
+
#
|
451
|
+
## In the second argument of gsub, we use '\2' to refer back to the content captured by the second capture group.
|
452
|
+
## This allows us to fetch the string without the surrounding quotes.
|
453
|
+
##============================================================##
|
454
|
+
def clean_key(key)
|
455
|
+
##============================================================##
|
456
|
+
## Convert key to string to avoid issues with gsub operations
|
457
|
+
## + Check if the key is an integer
|
458
|
+
##============================================================##
|
459
|
+
key = key.to_s
|
460
|
+
is_int = key =~ /\A[-+]?\d+\z/
|
461
|
+
|
462
|
+
##============================================================##
|
463
|
+
## Remove surrounding quotes from the key
|
464
|
+
## Re-add quotes if the key is in the list of reserved keys or is an integer
|
465
|
+
##============================================================##
|
466
|
+
key = key.gsub(/\A(['“”‘’"]?)(.*)\1\z/, '\2')
|
467
|
+
key = "\"#{key}\"" if key.in?(RESERVED_KEYS) || is_int
|
468
|
+
key
|
469
|
+
end
|
470
|
+
|
471
|
+
##============================================================##
|
472
|
+
## clean_value Function
|
473
|
+
## Purpose: Sanitize and standardize YAML values
|
474
|
+
## In YAML "inblock" scenarios, there's no need to add quotes
|
475
|
+
## around values as it's inherently handled.
|
476
|
+
## ============================================================ ##
|
477
|
+
def clean_value(value, with_quotes_verif = true)
|
478
|
+
##============================================================##
|
479
|
+
## Convert value to string to prevent issues in subsequent operations
|
480
|
+
##============================================================##
|
481
|
+
value = value.to_s
|
482
|
+
|
483
|
+
##============================================================##
|
484
|
+
## Remove newline characters at the end of the value if present.
|
485
|
+
## This should be done prior to strip operation to handle scenarios
|
486
|
+
## where the value ends with a space followed by a newline.
|
487
|
+
###============================================================##
|
488
|
+
value = value[0..-2] if value.end_with?(NEWLINE)
|
489
|
+
|
490
|
+
|
491
|
+
##============================================================##
|
492
|
+
## Clean up the value:
|
493
|
+
## - Remove tabs, carriage returns, form feeds, and vertical tabs.
|
494
|
+
## \t: corresponds to a tab
|
495
|
+
## \r: corresponds to a carriage return
|
496
|
+
## \f: corresponds to a form feed
|
497
|
+
## \v: corresponds to a vertical tab
|
498
|
+
## We keep the \n
|
499
|
+
##============================================================##
|
500
|
+
value = value.gsub(/[\t\r\f\v]+/, "")
|
501
|
+
|
502
|
+
##============================================================##
|
503
|
+
## Replace multiple spaces with a single space.
|
504
|
+
##============================================================##
|
505
|
+
value = value.gsub(/ {2,}/, SPACE)
|
506
|
+
|
507
|
+
##============================================================##
|
508
|
+
## Trim leading and trailing spaces.
|
509
|
+
##============================================================##
|
510
|
+
value = value.strip
|
511
|
+
|
512
|
+
##============================================================##
|
513
|
+
## Replace special quotes with standard single quotes.
|
514
|
+
##============================================================##
|
515
|
+
value = value.gsub(WEIRD_QUOTES_REGEX, SIMPLE_QUOTE)
|
516
|
+
|
517
|
+
##============================================================##
|
518
|
+
## Remove quotes surrounding the value if they are present.
|
519
|
+
## They will be re-added later if necessary.
|
520
|
+
##============================================================##
|
521
|
+
value = value[1..-2] if (value.start_with?(DOUBLE_QUOTE) && value.end_with?(DOUBLE_QUOTE)) || (value.start_with?(SIMPLE_QUOTE) && value.end_with?(SIMPLE_QUOTE))
|
522
|
+
|
523
|
+
##============================================================##
|
524
|
+
## Convert emoji representations such as \U0001F600 to their respective emojis.
|
525
|
+
##============================================================##
|
526
|
+
value = value.gsub(/\\U([0-9A-Fa-f]{8})/) { [::Regexp.last_match(1).to_i(16)].pack("U*") }
|
527
|
+
|
528
|
+
##=============================================================##
|
529
|
+
## Handling cases where the value must be surrounded by quotes
|
530
|
+
## if:
|
531
|
+
## value.include?(": ") => key: text with: here
|
532
|
+
## value.include?(" #") => key: text with # here
|
533
|
+
## value.include?(NEWLINE) => key: Line 1\nLine 2\nLine 3
|
534
|
+
## value.include?('\n') => key: Line 1"\n"Line 2"\n"Line 3
|
535
|
+
## value.start_with?(*YML_SPECIAL_CHARS) => key: @text
|
536
|
+
## value.end_with?(":") => key: text:
|
537
|
+
## value.in?(RESERVED_KEYS) => key: YES
|
538
|
+
## value.start_with?(SPACE) => key: 'text'
|
539
|
+
## value.end_with?(SPACE) => key: text '
|
540
|
+
## else:
|
541
|
+
## management of "" and " ". Not possible to have more spaces
|
542
|
+
## because we have already removed the double spaces
|
543
|
+
##=============================================================##
|
544
|
+
if value.present?
|
545
|
+
value = "\"#{value}\"" if (value.include?(": ") ||
|
546
|
+
value.include?(" #") ||
|
547
|
+
value.include?(NEWLINE) ||
|
548
|
+
value.include?('\n') ||
|
549
|
+
value.start_with?(*YML_SPECIAL_CHARS) ||
|
550
|
+
value.end_with?(":") ||
|
551
|
+
value.in?(RESERVED_KEYS) ||
|
552
|
+
value.start_with?(SPACE) ||
|
553
|
+
value.end_with?(SPACE)) &&
|
554
|
+
with_quotes_verif == true
|
555
|
+
|
556
|
+
else
|
557
|
+
value = "\"#{value}\""
|
558
|
+
end
|
559
|
+
value
|
560
|
+
end
|
561
|
+
|
562
|
+
##============================================================##
|
563
|
+
## Deep transform values resursively
|
564
|
+
##============================================================##
|
565
|
+
def deep_transform_values(hash, &block)
|
566
|
+
hash.transform_values do |value|
|
567
|
+
if value.is_a?(Hash)
|
568
|
+
deep_transform_values(value, &block)
|
569
|
+
else
|
570
|
+
block.call(value)
|
571
|
+
end
|
572
|
+
end
|
573
|
+
end
|
574
|
+
|
575
|
+
##============================================================##
|
576
|
+
## sort_by_key Function
|
577
|
+
## Purpose: Sort a hash by its keys, optionally recursively, with
|
578
|
+
## case-insensitive comparison and stripping of double quotes.
|
579
|
+
## ============================================================ #
|
580
|
+
def sort_by_key(hash, recursive = false, &block)
|
581
|
+
block ||= proc {|a, b| a.to_s.downcase.gsub(DOUBLE_QUOTE, "") <=> b.to_s.downcase.gsub(DOUBLE_QUOTE, "") }
|
582
|
+
hash.keys.sort(&block).each_with_object({}) do |key, seed|
|
583
|
+
seed[key] = hash[key]
|
584
|
+
seed[key] = sort_by_key(seed[key], true, &block) if recursive && seed[key].is_a?(Hash)
|
585
|
+
end
|
586
|
+
end
|
587
|
+
|
588
|
+
##============================================================##
|
589
|
+
## parse_xml Function
|
590
|
+
## Purpose: Parse an XML file into a nested hash representation.
|
591
|
+
##
|
592
|
+
## This method reads through the XML file line by line and creates a
|
593
|
+
## nested hash representation based on the structure and content of the XML.
|
594
|
+
##============================================================##
|
595
|
+
def parse_xml(file_path)
|
596
|
+
nested_hash = {}
|
597
|
+
inblock = nil
|
598
|
+
last_keys = []
|
599
|
+
|
600
|
+
##============================================================##
|
601
|
+
## We go over each line of the file to create a hash.
|
602
|
+
## We put the multiline blocks in an array to recover
|
603
|
+
## all the values and the formatting type then we will pass
|
604
|
+
## on each of these arrays subsequently to transform them
|
605
|
+
## in the corresponding string
|
606
|
+
##============================================================##
|
607
|
+
File.foreach(file_path) do |line|
|
608
|
+
##============================================================##
|
609
|
+
## Determine the indentation level of the line.
|
610
|
+
##============================================================##
|
611
|
+
indent_level = line[/\A */].size
|
612
|
+
|
613
|
+
##============================================================##
|
614
|
+
## Check for blank lines (which can be present within multi-line blocks)
|
615
|
+
##============================================================##
|
616
|
+
blank_line = line.gsub(NEWLINE, "").empty?
|
617
|
+
|
618
|
+
##============================================================##
|
619
|
+
## Split the line into key and value.
|
620
|
+
##============================================================##
|
621
|
+
split = line.strip.split(":", 2)
|
622
|
+
key = split[0].to_s.strip
|
623
|
+
inblock = nil if !inblock.nil? && !blank_line && indent_level <= inblock
|
624
|
+
|
625
|
+
|
626
|
+
##============================================================##
|
627
|
+
## Set the key level based on indentation
|
628
|
+
##============================================================##
|
629
|
+
last_keys = last_keys[0, (blank_line ? inblock + INDENT_SIZE : indent_level) / INDENT_SIZE]
|
630
|
+
|
631
|
+
##============================================================##
|
632
|
+
## If inside a multi-line block, append the line to the current key's value
|
633
|
+
##============================================================##
|
634
|
+
if !inblock.nil?
|
635
|
+
current_key = last_keys.last
|
636
|
+
parent_keys = last_keys[0..-2]
|
637
|
+
result = parent_keys.reduce(nested_hash) {|hash, k| hash[k] }
|
638
|
+
result[current_key][1] << line.strip
|
639
|
+
##============================================================##
|
640
|
+
## Handle multi-line key declarations.
|
641
|
+
## We no longer have the >
|
642
|
+
## because it is transformed in the clean_xml into |
|
643
|
+
##============================================================##
|
644
|
+
elsif line.gsub("#{key}:", "").strip.start_with?("|")
|
645
|
+
inblock = indent_level
|
646
|
+
block_type = line.gsub("#{key}:", "").strip
|
647
|
+
result = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
|
648
|
+
result[key] = [block_type, []]
|
649
|
+
last_keys << key
|
650
|
+
##============================================================##
|
651
|
+
## Handle regular key-value pair declarations
|
652
|
+
##============================================================##
|
653
|
+
else
|
654
|
+
value = split[1].to_s.strip
|
655
|
+
result = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
|
656
|
+
if value.empty?
|
657
|
+
result[key] = {}
|
658
|
+
last_keys << key
|
659
|
+
else
|
660
|
+
result[key] = value.strip == "null" ? nil : value
|
661
|
+
end
|
662
|
+
end
|
663
|
+
end
|
664
|
+
|
665
|
+
##============================================================##
|
666
|
+
## We go over each value then we process if it is a has
|
667
|
+
## | with final newline
|
668
|
+
## |4 with newline and indentation of 4
|
669
|
+
## |- without newline
|
670
|
+
## |4- without newline and indentation of 4
|
671
|
+
##============================================================##
|
672
|
+
deep_transform_values(nested_hash) do |value|
|
673
|
+
if value.is_a?(Array)
|
674
|
+
style_type = value[0]
|
675
|
+
indent_supp = style_type.scan(/\d+/).first&.to_i || 0
|
676
|
+
indent_supp = [indent_supp - INDENT_SIZE, 0].max
|
677
|
+
value[1] = value[1].map {|l| "#{SPACE * indent_supp}#{l}" }
|
678
|
+
text = value[1].join(NEWLINE)
|
679
|
+
modifier = style_type[-1]
|
680
|
+
|
681
|
+
case modifier
|
682
|
+
when "+"
|
683
|
+
text << NEWLINE unless text.end_with?(NEWLINE)
|
684
|
+
when "-"
|
685
|
+
text.chomp!
|
686
|
+
else
|
687
|
+
text << NEWLINE unless text.end_with?(NEWLINE)
|
688
|
+
end
|
689
|
+
text
|
690
|
+
else
|
691
|
+
value
|
692
|
+
end
|
693
|
+
end
|
694
|
+
end
|
695
|
+
|
696
|
+
|
697
|
+
end
|
698
|
+
end
|
data/lib/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: immosquare-yaml
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- IMMO SQUARE
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-09-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: iso-639
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.2.5
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.2.5
|
27
|
+
description: IMMOSQUARE-YAML is a lightweight and efficient YAML parser designed to
|
28
|
+
facilitate the handling of real estate data in YAML format, offering streamlined
|
29
|
+
processes and a simplified user experience.
|
30
|
+
email:
|
31
|
+
- jules@immosquare.com
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- lib/immosquare-yaml.rb
|
37
|
+
- lib/version.rb
|
38
|
+
homepage: https://github.com/IMMOSQUARE/immosquare-yaml
|
39
|
+
licenses:
|
40
|
+
- MIT
|
41
|
+
metadata: {}
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options: []
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 2.6.0
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
requirements: []
|
57
|
+
rubygems_version: 3.1.6
|
58
|
+
signing_key:
|
59
|
+
specification_version: 4
|
60
|
+
summary: A YAML parser tailored for real estate solutions.
|
61
|
+
test_files: []
|