zarby 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '094c48a2c426ddb854907fb342840e5360f227b63a272fd8680128573f08e28c'
4
- data.tar.gz: 3327de7fe14f73baab98e11cf3f7e8c87b2fa6ee2896ce1c877bcdef8ed0a9d9
3
+ metadata.gz: c981e3c42a22e8cc916f44d4f4cef1b2f64029a70dfdce1d9e07a0273a48e6d8
4
+ data.tar.gz: 3925cdb2dba86f5c9b754380753d9ac0607886099c2e21480d85de620e09d166
5
5
  SHA512:
6
- metadata.gz: 623c758ff2e50d52939b08f58b414c23297503ab596b4afa0b91f2075b123c42d75f61e0a7501d83ee010e4ce29d7b2c070194596b42d1a2262861b76cca1425
7
- data.tar.gz: 5764f9b41cb8f31fa3cb171e52fd4265c1295ebca59d73c89fbb563524cbbf12962511159dbdbea404feaaa29ed05f6c648fa247105426155acee5a3970e4b54
6
+ metadata.gz: d19251edbcf44a06d2ec737a1f9d062a1d5ed8856cea22da0327b09d137aefea1b8974477d5c6834c4f46b271cfab5acb23022dabcef2d6570ae38175b2ae53a
7
+ data.tar.gz: 46720efbdd8d8bedc2edc8b912b68e45f19d2cd876603ed3737cce9f06a7525771a676af9cc07e536bcbb557e638c07296d5fe4e4a3b804d7fd83acab89f3b94
data/CHANGELOG.md CHANGED
@@ -1,3 +1,20 @@
1
+ # 0.1.5 / 2023-10-24
2
+
3
+ ## Enhancements
4
+
5
+ * Refactoring method utf8 to Zarby::Csv class
6
+ * Add documentation to Zarby::Csv module and Zarby::Normalize module
7
+
8
+ # 0.1.4 / 2023-10-24
9
+
10
+ * Remove missing debug log
11
+
12
+ # 0.1.3 / 2023-10-24
13
+
14
+ ## Enhancements
15
+
16
+ * Decode ASCII-8BIT (actually windows)
17
+
1
18
  # 0.1.2 / 2023-10-20
2
19
 
3
20
  ## Enhancements
data/lib/zarby/csv.rb CHANGED
@@ -3,23 +3,30 @@
3
3
  module Zarby
4
4
  class NoColSepDetected < StandardError; end
5
5
 
6
+ # this class is used to detect the column separator in a CSV file
6
7
  class Csv
7
8
  COMMON_DELIMITERS = ['","', '";"', '":"', '"|"'].freeze
8
9
 
10
+ # @param [String] content
11
+ # @return [Csv]
9
12
  def initialize(content:)
10
13
  @content = content || ""
11
14
  end
12
15
 
16
+ # @param [String] content
17
+ # @return [String]
13
18
  def self.detect_separator(content)
14
19
  new(content: content).detect_separator
15
20
  end
16
21
 
22
+ # @return [String]
17
23
  def detect_separator
18
24
  valid? ? delimiters[0][0][1] : raise(Zarby::NoColSepDetected)
19
25
  end
20
26
 
21
27
  private
22
28
 
29
+ # @return [Boolean]
23
30
  def valid?
24
31
  !delimiters.collect(&:last).reduce(:+).zero?
25
32
  end
@@ -28,14 +35,17 @@ module Zarby
28
35
  # delimiters[0] #=> ["\";\"", 54]
29
36
  # delimiters[0][0] #=> "\",\""
30
37
  # delimiters[0][0][1] #=> ";"
38
+ # @return [Array<Array<String, Integer>>]
31
39
  def delimiters
32
40
  @delimiters ||= COMMON_DELIMITERS.inject({}, &count).sort(&most_found)
33
41
  end
34
42
 
43
+ # @return [Proc]
35
44
  def most_found
36
45
  ->(a, b) { b[1] <=> a[1] }
37
46
  end
38
47
 
48
+ # @return [Proc]
39
49
  def count
40
50
  lambda { |hash, delimiter|
41
51
  hash[delimiter] = @content.count(delimiter)
@@ -1,50 +1,35 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zarby
4
+ # this class is used to normalize the input string to UTF-8
4
5
  class Normalize
5
6
  # utf-8 converting from the string's given encoding
6
- COMMON_ENCODINGS = %w[UTF-8 Windows-1252 ASCII-8BIT US-ASCII].freeze
7
+ COMMON_ENCODINGS = %w[UTF-8 Windows-1252 ASCII-8BIT ISO-8859-1 US-ASCII].freeze
7
8
 
9
+ # @param input [String]
10
+ # @return [String]
8
11
  def initialize(input:)
9
- @input = input || ""
12
+ @input = input || ''
10
13
  end
11
14
 
15
+ # @param input [String]
16
+ # @return [String]
12
17
  def self.utf8(input)
13
18
  new(input: input).utf8
14
19
  end
15
20
 
21
+ # @return [String]
16
22
  def utf8
17
23
  output = @input if valid?
18
- COMMON_ENCODINGS.each do |encoding|
19
- output ||= convert { @input.encode(encoding) }
20
- output ||= convert { @input.force_encoding('UTF-8') } if encoding == 'UTF-8'
21
- puts output
22
- end
23
24
 
24
- output ||= unpack_pack { @input.unpack("C*").pack("U*") } if output.nil?
25
-
26
- # replace any unknown characters with a placeholder: �
27
- output ||= convert { @input.encode('UTF-8', invalid: :replace, undef: :replace) }
28
- puts output
29
- output
30
- end
31
-
32
- private
33
-
34
- def convert
35
- string = yield
36
- string if string.valid_encoding?
25
+ output ||= @input.force_encoding(Encoding::ISO_8859_1).encode!(Encoding::UTF_8)
37
26
  rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
38
27
  nil
39
28
  end
40
29
 
41
- def unpack_pack
42
- string = yield
43
- string if string.valid_encoding?
44
- rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
45
- nil
46
- end
30
+ private
47
31
 
32
+ # @return [Boolean]
48
33
  def valid?
49
34
  @input.encoding.name == 'UTF-8' && @input.valid_encoding?
50
35
  end
data/lib/zarby/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zarby
4
- VERSION = "0.1.3"
4
+ VERSION = "0.1.5"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zarby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - vianney.sonneville
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-23 00:00:00.000000000 Z
11
+ date: 2023-10-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: