immosquare-extensions 0.1.13 → 0.1.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/immosquare-extensions/file.rb +46 -2
- data/lib/immosquare-extensions/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9eac5ff3164cb73f659292ba3a389b9213f647e5c5c54f9da0c6535ec68ca573
|
4
|
+
data.tar.gz: e1bc371e64193f0d4750298aa0c5e18a88827caa5c2d4826885a8caa530f4ccd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 734f2700c95e499f2a4fa35f7b7b7fc80f81aac58d5480332db03490919b5cec8fad77355cc9d5014c21b613144eca10981818422420fe64a54dc4ff5bb210ef
|
7
|
+
data.tar.gz: a8076f8d899a9fa2bb6b54b09e15c2b6674f59141fc1d4d17e90e481f9de7c362fef9ffa5ebc0b9d7b7c55aeebc28a5ee338c0dad1d5775cfceb7d87a7304297
|
@@ -14,6 +14,17 @@ class File
|
|
14
14
|
## The total number of lines in the normalized file.
|
15
15
|
##===========================================================================##
|
16
16
|
def self.normalize_last_line(file_path)
|
17
|
+
##============================================================##
|
18
|
+
## Get the file size
|
19
|
+
##============================================================##
|
20
|
+
file_size = File.size?(file_path)
|
21
|
+
|
22
|
+
##============================================================##
|
23
|
+
## If the file is empty (0 bytes), there's nothing to normalize.
|
24
|
+
##============================================================##
|
25
|
+
return 0 if file_size.nil? || file_size == 0
|
26
|
+
|
27
|
+
|
17
28
|
end_of_line = $INPUT_RECORD_SEPARATOR || "\n"
|
18
29
|
##============================================================##
|
19
30
|
## Read all lines from the file
|
@@ -22,8 +33,41 @@ class File
|
|
22
33
|
## Read the content of the file with the detected encoding,
|
23
34
|
## falling back to UTF-8 if the detected encoding is empty or invalid.
|
24
35
|
##============================================================##
|
25
|
-
detected_encoding
|
26
|
-
|
36
|
+
detected_encoding = `uchardet #{file_path}`.strip
|
37
|
+
encoding_whitelist = [
|
38
|
+
"UTF-8", # Encodage universel pour texte avec ou sans accents
|
39
|
+
"Windows-1252", # Utilisé couramment pour les langues occidentales
|
40
|
+
"ISO-8859-1", # L'encodage Latin-1, très utilisé en Europe Occidentale
|
41
|
+
"Windows-1250", # Europe Centrale et Orientale
|
42
|
+
"ISO-8859-2", # Pour les langues d'Europe Centrale
|
43
|
+
"Windows-1251", # Cyrillic; utilisé pour le russe, bulgare, serbe cyrillique
|
44
|
+
"KOI8-R", # Russe
|
45
|
+
"ISO-8859-5", # Encodage cyrillique
|
46
|
+
"ISO-8859-7", # Grec
|
47
|
+
"Windows-1253", # Grec
|
48
|
+
"ISO-8859-9", # Turc
|
49
|
+
"Windows-1254", # Turc
|
50
|
+
"ISO-8859-15", # Variante de l'ISO-8859-1 qui couvre plus de caractères
|
51
|
+
"Windows-1256", # Arabe
|
52
|
+
"ISO-8859-6", # Arabe
|
53
|
+
"Windows-1255", # Hébreu
|
54
|
+
"ISO-8859-8", # Hébreu
|
55
|
+
"Big5", # Chinois traditionnel
|
56
|
+
"GB2312", # Chinois simplifié
|
57
|
+
"Shift_JIS", # Japonais
|
58
|
+
"EUC-JP", # Japonais
|
59
|
+
"EUC-KR", # Coréen
|
60
|
+
"ISO-2022-JP", # Encodage pour le courrier électronique en japonais
|
61
|
+
"ISO-2022-KR", # Coréen
|
62
|
+
"ISO-2022-CN", # Chinois
|
63
|
+
"UTF-16LE", # UTF-16 Little Endian
|
64
|
+
"UTF-16BE", # UTF-16 Big Endian
|
65
|
+
"UTF-32LE", # UTF-32 Little Endian
|
66
|
+
"UTF-32BE" # UTF-32 Big Endian
|
67
|
+
]
|
68
|
+
|
69
|
+
|
70
|
+
encoding_to_use = detected_encoding.empty? || !encoding_whitelist.include?(detected_encoding) ? "UTF-8" : "#{detected_encoding}:UTF-8"
|
27
71
|
content = File.read(file_path, :encoding => encoding_to_use)
|
28
72
|
|
29
73
|
##===========================================================================##
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: immosquare-extensions
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- IMMO SQUARE
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode_utils
|