zarby 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b77baffed4d87a5ea4f1b42a724ff83f437d2e8d8f5fee9188a826962797d954
4
- data.tar.gz: 1fdf369f8e76f06dab6785ee54400aec28e730d391f4631891a4e50e628fc762
3
+ metadata.gz: 656415d6b0a326bad3ad11708792f82c83296f341bb4c0d58389942abb8d44ed
4
+ data.tar.gz: 2f4d28eb627d2bee49cf8ceb4e9d500cc6ba968e83fe6a2f77b5c37b6fb193b8
5
5
  SHA512:
6
- metadata.gz: 4875f023186b998b527207b1e9c3b706c9c57eada03537050ba5f0d03e0f2f6fc273fe7c3622619fccfcf85e397ced4b236ef3327f07dabb2c01b66891ec9d01
7
- data.tar.gz: 462fc6605506572793327457a9c3b84f145da2fbdb90fd2769b74ee5233c28d778e9e651b46bb34e412abc38e09f5a3375f11fa119339046d535229c17c90eb2
6
+ metadata.gz: bea273ad274d1f0e5acc1bc084ec7d78e5d7c88789e4e7e8725fa3fefaff5969b424667a58f3e73af7aa43a9adedce7bc9f3509e9d57783701dac21f3f1e22e5
7
+ data.tar.gz: 62219550c9ab648a856018395edb45ffb4527085a543d29cba2970c839a81e529a0b33e150ba3b448c861d3c5af60d791903f0e076b201ec83c94b89496e21b4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,20 @@
1
+ # 0.1.5 / 2023-10-24
2
+
3
+ ## Enhancements
4
+
5
+ * Refactoring method utf8 to Zarby::Csv class
6
+ * Add documentation to Zarby::Csv module and Zarby::Normalize module
7
+
8
+ # 0.1.4 / 2023-10-24
9
+
10
+ * Remove missing debug log
11
+
12
+ # 0.1.3 / 2023-10-24
13
+
14
+ ## Enhancements
15
+
16
+ * Decode ASCII-8BIT (actually windows)
17
+
1
18
  # 0.1.2 / 2023-10-20
2
19
 
3
20
  ## Enhancements
data/lib/zarby/csv.rb CHANGED
@@ -3,23 +3,30 @@
3
3
  module Zarby
4
4
  class NoColSepDetected < StandardError; end
5
5
 
6
+ # this class is used to detect the column separator in a CSV file
6
7
  class Csv
7
8
  COMMON_DELIMITERS = ['","', '";"', '":"', '"|"'].freeze
8
9
 
10
+ # @param [String] content
11
+ # @return [Csv]
9
12
  def initialize(content:)
10
13
  @content = content || ""
11
14
  end
12
15
 
16
+ # @param [String] content
17
+ # @return [String]
13
18
  def self.detect_separator(content)
14
19
  new(content: content).detect_separator
15
20
  end
16
21
 
22
+ # @return [String]
17
23
  def detect_separator
18
24
  valid? ? delimiters[0][0][1] : raise(Zarby::NoColSepDetected)
19
25
  end
20
26
 
21
27
  private
22
28
 
29
+ # @return [Boolean]
23
30
  def valid?
24
31
  !delimiters.collect(&:last).reduce(:+).zero?
25
32
  end
@@ -28,14 +35,17 @@ module Zarby
28
35
  # delimiters[0] #=> ["\";\"", 54]
29
36
  # delimiters[0][0] #=> "\",\""
30
37
  # delimiters[0][0][1] #=> ";"
38
+ # @return [Array<Array<String, Integer>>]
31
39
  def delimiters
32
40
  @delimiters ||= COMMON_DELIMITERS.inject({}, &count).sort(&most_found)
33
41
  end
34
42
 
43
+ # @return [Proc]
35
44
  def most_found
36
45
  ->(a, b) { b[1] <=> a[1] }
37
46
  end
38
47
 
48
+ # @return [Proc]
39
49
  def count
40
50
  lambda { |hash, delimiter|
41
51
  hash[delimiter] = @content.count(delimiter)
@@ -1,48 +1,36 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zarby
4
+ # this class is used to normalize the input string to UTF-8
4
5
  class Normalize
5
6
  # utf-8 converting from the string's given encoding
6
- COMMON_ENCODINGS = %w[UTF-8 Windows-1252 ASCII-8BIT US-ASCII].freeze
7
+ COMMON_ENCODINGS = %w[UTF-8 Windows-1252 ASCII-8BIT ISO-8859-1 US-ASCII].freeze
7
8
 
9
+ # @param input [String]
10
+ # @return [String]
8
11
  def initialize(input:)
9
- @input = input || ""
12
+ @input = input&.force_encoding(Encoding::UTF_8) || ''
10
13
  end
11
14
 
15
+ # @param input [String]
16
+ # @return [String]
12
17
  def self.utf8(input)
13
18
  new(input: input).utf8
14
19
  end
15
20
 
21
+ # @return [String]
16
22
  def utf8
17
23
  output = @input if valid?
18
- COMMON_ENCODINGS.each do |encoding|
19
- output ||= convert { @input.encode(encoding) }
20
- output ||= convert { @input.force_encoding('UTF-8') } if encoding == 'UTF-8'
21
- end
22
24
 
23
- output ||= unpack_pack { @input.unpack("C*").pack("U*") } if output.nil?
24
25
 
25
- # replace any unknown characters with a placeholder: �
26
- output ||= convert { @input.encode('UTF-8', invalid: :replace, undef: :replace) }
27
- output
28
- end
29
-
30
- private
31
-
32
- def convert
33
- string = yield
34
- string if string.valid_encoding?
26
+ output ||= @input.force_encoding(Encoding::ISO_8859_1).encode!(Encoding::UTF_8)
35
27
  rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
36
28
  nil
37
29
  end
38
30
 
39
- def unpack_pack
40
- string = yield
41
- string if string.valid_encoding?
42
- rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
43
- nil
44
- end
31
+ private
45
32
 
33
+ # @return [Boolean]
46
34
  def valid?
47
35
  @input.encoding.name == 'UTF-8' && @input.valid_encoding?
48
36
  end
data/lib/zarby/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zarby
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.6"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zarby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - vianney.sonneville
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-24 00:00:00.000000000 Z
11
+ date: 2023-11-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: