ldif_parser 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 86f7b2757e53b3856dee02c28b75d3007ee67efcb016ad6186392d1b286a9015
4
- data.tar.gz: 9e34979e64fbd4c6ba5ea5e08762941c4b136825312a9746d4ee4521cd46b381
3
+ metadata.gz: 39d7e193974194956a141adaad500d69571ba7932d6ae08d73d58f8d7d770f52
4
+ data.tar.gz: 6c364a27f19e583a6032e3d97f16df493b9e4f89c60fed724702ac29f3f63ab6
5
5
  SHA512:
6
- metadata.gz: 48ab36890e5d8aaa380d809f3f1e8a812f46fc5f01a455c926a21ee4de6746ed17f5c6d37b4649f35170abf46eee798973c0424d27fc6d585ddf6d6b6f39d4fe
7
- data.tar.gz: 04fc17e10e1d27487d228d96307acece582f74cb86cdd9ba1159a3c68f026146af5d9ff0beb2f3ff0ef6911be5e09e287881d2475686cbc5c8c064402d805667
6
+ metadata.gz: dde3733aaa120fadffb2fd28f47459a5997d4e5e7295b5664dd6a986a2d18ad3d34d7609f4436c276d6b217abe6b480b4fdb5fed50a79cdac7f3d797adcc7db2
7
+ data.tar.gz: 3197b571025c9121f6e8c1d4fd989711156de14c408577cde9a585f400f0926dd7b84f45a9cb5c486829f975941de0de13bd3318ff63845b54b912578611bd22
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ ruby '>= 3.1'
6
+
7
+ group :development do
8
+ gem 'benchmark-ips'
9
+ gem 'rubocop'
10
+ end
data/README.md CHANGED
@@ -5,10 +5,8 @@
5
5
  require 'ldif_parser'
6
6
 
7
7
  ldif_path = '/tmp/ldap.bak'
8
- result = LdifParser.parse_file(ldif_path, only: %w[displayName givenName sn mail])
9
-
10
- result.each do |res|
11
- p res
8
+ LdifParser.open(ldif_path, only: %w[displayName givenName sn mail]).each do |entry|
9
+ p entry
12
10
  end
13
11
  ```
14
12
 
@@ -20,9 +18,7 @@ $ gem install ldif_parser
20
18
 
21
19
  ## Usage
22
20
 
23
- In the example `result` is an `Array[Hash[Symbol, Array[String]]]`.
24
-
25
- Each entry has default a default `Array` value.
21
+ Use `each` method to iterate on each ldif entries and do what you want.
26
22
 
27
23
  If the ldif file is too large, you can optimize the processing of the file with the `only` and `except` options.
28
24
 
@@ -32,4 +28,16 @@ These options allow you to limit the number of lines in the ldif file that will
32
28
 
33
29
  - minimized: minimize has string all array with only one value
34
30
  - only: captures only the specified ldap attributes
35
- - except: ignore the specified ldap attributes
31
+ - except: ignore the specified ldap attributes
32
+
33
+ ## Speed test
34
+
35
+ ````bash
36
+ $ time ruby test/speed.rb
37
+ ruby -v => 3.1.6p260
38
+ wc -l $path => 1000000
39
+
40
+ real 0m4,562s
41
+ user 0m4,457s
42
+ sys 0m0,091s
43
+ ````
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'entry_maker'
4
+ require 'base64'
5
+
6
+ class LdifParser
7
+ class EntriesMaker
8
+ R_INPUT_ENTRIES_SEPARATOR = /^\s*$/
9
+
10
+ class << self
11
+ def parse(input, options = {})
12
+ content = []
13
+
14
+ next until input.eof? || (line = input.readline).match(R_INPUT_ENTRIES_SEPARATOR)
15
+
16
+ return if !line || line.empty?
17
+
18
+ until input.eof? || (line = input.readline).match(R_INPUT_ENTRIES_SEPARATOR)
19
+ line.chomp!
20
+ content << line
21
+ end
22
+
23
+ input.seek(-line.length, IO::SEEK_CUR) if !input.eof? && line
24
+
25
+ EntryMaker.new(content.join("\n"), **options).make
26
+ end
27
+ end
28
+ end
29
+ end
data/lib/entry.rb ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LdifParser
4
+ class Entry < Hash
5
+ def dn
6
+ return @dn if defined?(@dn)
7
+
8
+ @dn = self[:dn]
9
+ end
10
+
11
+ def get(key)
12
+ self[key.to_s.downcase.to_sym]
13
+ end
14
+ end
15
+ end
data/lib/entry_maker.rb CHANGED
@@ -1,55 +1,44 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'hash_insensitive'
3
+ require_relative 'entry'
4
4
  require 'base64'
5
5
 
6
6
  class LdifParser
7
7
  class EntryMaker
8
- R_LINE_SPLIT = /(\w+)(:+)\s*(.*)/.freeze
8
+ R_LINE_SPLIT = /(\w+)(:+)\s*(.*)/
9
9
  BASE64_SEPARATOR = '::'
10
10
 
11
- class << self
12
- def call(str)
13
- new(str).make
14
- end
15
-
16
- def call_minimized(str)
17
- new(str).make_minimized
18
- end
19
- end
20
-
21
- def initialize(str)
11
+ def initialize(str, minimized: false, only_regexp: nil, except_regexp: nil)
22
12
  @str = str
23
13
  @str.gsub!(/\n\s+/, '')
24
14
  @str.gsub!(/\n+$/, '')
15
+
16
+ @minimized = minimized
17
+ @only_regexp = only_regexp
18
+ @except_regexp = except_regexp
25
19
  end
26
20
 
27
21
  def make
28
22
  hash = lines_decoded_to_h
29
- hash.extend(HashInsensitive)
30
- hash.default = []
31
- hash
32
- end
23
+ hash[:dn] = hash[:dn].first
33
24
 
34
- def make_minimized
35
- make.transform_values do |v|
36
- v.length == 1 ? v.first : v
25
+ if @minimized
26
+ hash.transform_values! do |v|
27
+ v.length == 1 ? v.first : v
28
+ end
37
29
  end
30
+
31
+ hash
38
32
  end
39
33
 
40
34
  private
41
35
 
42
36
  def lines_decoded_to_h
43
- lines_decoded.each_with_object({}) do |(k, v), h|
44
- init_hash(h, k)
45
- h[k].push(v)
37
+ lines_decoded.each_with_object(Entry.new) do |(k, v), h|
38
+ (h[k] ||= []) << v
46
39
  end
47
40
  end
48
41
 
49
- def init_hash(h, k)
50
- h[k] ||= []
51
- end
52
-
53
42
  def lines_decoded
54
43
  lines.map do |line|
55
44
  line_decoder(line)
@@ -57,15 +46,20 @@ class LdifParser
57
46
  end
58
47
 
59
48
  def line_decoder(line)
60
- parts = line.scan(R_LINE_SPLIT).first
61
- parts[0] = parts[0].to_sym
62
- parts[2] = Base64.decode64(parts[2]).force_encoding('UTF-8') if parts[1] == BASE64_SEPARATOR
63
- parts.delete_at(1)
64
- parts
49
+ key, separator, value = line.scan(R_LINE_SPLIT).first
50
+
51
+ if separator == BASE64_SEPARATOR
52
+ [key.to_sym, Base64.decode64(value).force_encoding('UTF-8')]
53
+ else
54
+ [key.to_sym, value]
55
+ end
65
56
  end
66
57
 
67
58
  def lines
68
- @str.split(/\n/)
59
+ arr = @str.split(/\n/)
60
+ arr.select! { |line| line.match?(@only_regexp) } unless @only_regexp.nil?
61
+ arr.reject! { |line| line.match?(@except_regexp) } unless @except_regexp.nil?
62
+ arr
69
63
  end
70
64
  end
71
65
  end
data/lib/ldif_parser.rb CHANGED
@@ -1,95 +1,94 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'entry_maker'
3
+ require_relative 'entries_maker'
4
4
  require_relative 'version'
5
+ require 'stringio'
5
6
 
6
7
  class LdifParser
7
- NEW_LDIF_OBJECT_PATTERN = 'dn'
8
- SPACE = ' '
9
- DOUBLE_POINT = ':'
10
-
11
8
  class << self
12
- def parse_file(ldif_path, minimized: false, only: [], except: [])
13
- parse(IO.read(ldif_path), minimized: minimized, only: only, except: except)
9
+ def open(path, minimized: false, only: [], except: [])
10
+ options = {
11
+ minimized: minimized,
12
+ only_regexp: build_regexp(only),
13
+ except_regexp: build_regexp(except)
14
+ }.freeze
15
+
16
+ input = File.open(File.expand_path(path))
17
+
18
+ new(input, options).tap do |parser|
19
+ if block_given?
20
+ yield parser
21
+
22
+ parser.close
23
+ else
24
+ ObjectSpace.define_finalizer parser, finalizer(input)
25
+ end
26
+ end
14
27
  end
15
28
 
16
- def parse(str, minimized: false, only: [], except: [])
17
- included_pattern(only)
18
- excluded_pattern(except)
19
- parser = new(str)
20
- minimized ? parser.parse_minimized! : parser.parse!
21
- end
29
+ private
22
30
 
23
- def included_pattern(patterns)
24
- @included_patterns = patterns.map(&:downcase)
25
- end
31
+ def build_regexp(keys)
32
+ return nil if keys.empty?
26
33
 
27
- def excluded_pattern(patterns)
28
- @excluded_patterns = patterns.map(&:downcase)
34
+ or_map = keys.map { |key| "#{key}:" }
35
+ Regexp.new("^(#{or_map.join('|')})", Regexp::IGNORECASE)
29
36
  end
30
37
 
31
- attr_reader :included_patterns, :excluded_patterns
38
+ def finalizer(io)
39
+ proc { io.close }
40
+ end
32
41
  end
33
42
 
34
- def initialize(str)
35
- @str = str
36
- end
43
+ attr_reader :input
37
44
 
38
- def parse!
39
- str_parts.map do |str|
40
- EntryMaker.call(str)
41
- end
42
- end
45
+ def initialize(what, options = {})
46
+ @input = if what.respond_to? :to_io
47
+ what.to_io
48
+ elsif what.is_a? String
49
+ StringIO.new(what)
50
+ else
51
+ raise ArgumentError, 'I do not know what to do.'
52
+ end
43
53
 
44
- def parse_minimized!
45
- str_parts.map do |str|
46
- EntryMaker.call_minimized(str)
47
- end
54
+ @options = options
48
55
  end
49
56
 
50
- def str_parts
51
- parts = []
52
- str = ''.dup
53
- previous_key = nil
54
-
55
- @str.each_line do |line|
56
- line_key = get_line_key(line, previous_key)
57
- next if line_key.nil?
57
+ def close
58
+ @input.close
59
+ end
58
60
 
59
- if line_has_to_be_excluded?(line_key)
60
- previous_key = line_key
61
- next
62
- end
61
+ def lock
62
+ @input.flock File::LOCK_SH if @input.respond_to? :flock
63
63
 
64
- if new_ldif_object?(line_key, str, line)
65
- parts << str
66
- str = ''.dup
67
- end
64
+ return unless block_given?
68
65
 
69
- str << line
70
- previous_key = line_key
66
+ begin
67
+ yield self
68
+ ensure
69
+ unlock
71
70
  end
72
-
73
- parts << str
74
-
75
- @str = nil
76
-
77
- parts
78
71
  end
79
72
 
80
- def new_ldif_object?(line_key, str, line)
81
- line_key == NEW_LDIF_OBJECT_PATTERN && !str.empty? && !line.start_with?(SPACE)
73
+ def unlock
74
+ return unless @input.respond_to? :flock
75
+
76
+ @input.flock File::LOCK_UN
82
77
  end
83
78
 
84
- def get_line_key(line, previous_key)
85
- return previous_key if line.start_with?(SPACE)
79
+ def each(&block)
80
+ @input.seek 0
86
81
 
87
- line.split(DOUBLE_POINT).first&.downcase || previous_key
82
+ lock do
83
+ each_no_lock(&block)
84
+ end
88
85
  end
89
86
 
90
- def line_has_to_be_excluded?(line_key)
91
- return false if line_key == NEW_LDIF_OBJECT_PATTERN
87
+ private
92
88
 
93
- self.class.excluded_patterns.include?(line_key) || (!self.class.included_patterns.empty? && !self.class.included_patterns.include?(line_key))
89
+ def each_no_lock
90
+ while (entry = EntriesMaker.parse(@input, @options))
91
+ yield entry
92
+ end
94
93
  end
95
94
  end
data/lib/version.rb CHANGED
@@ -1,15 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LdifParser
4
- def self.gem_version
5
- Gem::Version.new VERSION::STRING
6
- end
7
-
8
- module VERSION
9
- MAJOR = 0
10
- MINOR = 5
11
- TINY = 0
12
-
13
- STRING = [MAJOR, MINOR, TINY].compact.join('.')
4
+ class << self
5
+ def version
6
+ File.read(File.expand_path('../VERSION', __dir__)).strip
7
+ end
8
+ alias gem_version version
14
9
  end
15
10
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ldif_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Désécot
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-17 00:00:00.000000000 Z
11
+ date: 2025-03-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Simple class to parse ldif file
14
14
  email:
@@ -17,17 +17,18 @@ executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
+ - Gemfile
20
21
  - LICENSE
21
22
  - README.md
23
+ - VERSION
24
+ - lib/entries_maker.rb
25
+ - lib/entry.rb
22
26
  - lib/entry_maker.rb
23
- - lib/hash_insensitive.rb
24
27
  - lib/ldif_parser.rb
25
28
  - lib/version.rb
26
- - sig/entry_maker.rbs
27
- - sig/ldif_parser.rbs
28
29
  homepage: https://github.com/RaoH37/ldif_parser
29
30
  licenses:
30
- - GPL-3.0
31
+ - GPL-3.0-only
31
32
  metadata:
32
33
  homepage_uri: https://github.com/RaoH37/ldif_parser
33
34
  source_code_uri: https://github.com/RaoH37/ldif_parser
@@ -40,14 +41,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
40
41
  requirements:
41
42
  - - ">="
42
43
  - !ruby/object:Gem::Version
43
- version: '2.7'
44
+ version: '3.1'
44
45
  required_rubygems_version: !ruby/object:Gem::Requirement
45
46
  requirements:
46
47
  - - ">="
47
48
  - !ruby/object:Gem::Version
48
- version: '0'
49
+ version: 1.8.11
49
50
  requirements: []
50
- rubygems_version: 3.4.19
51
+ rubygems_version: 3.3.27
51
52
  signing_key:
52
53
  specification_version: 4
53
54
  summary: LDIF parser
@@ -1,31 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module HashInsensitive
4
- def dn
5
- @dn ||= self['dn'].first
6
- end
7
-
8
- def insensitive_origin_keys
9
- keys.select { |key| key.respond_to?(:to_s) }
10
- end
11
-
12
- def insensitive_keys
13
- insensitive_origin_keys.map(&:to_s).map(&:downcase)
14
- end
15
-
16
- def default=(obj)
17
- @default = obj
18
- end
19
-
20
- def default(key)
21
- return @default unless key.respond_to?(:to_s)
22
-
23
- tmp_key = key.to_s.downcase
24
- tmp_index = insensitive_keys.index(tmp_key)
25
- return @default if tmp_index.nil?
26
-
27
- insensitive_origin_key = insensitive_origin_keys.at(tmp_index)
28
-
29
- self[insensitive_origin_key]
30
- end
31
- end
data/sig/entry_maker.rbs DELETED
@@ -1,14 +0,0 @@
1
- class LdifParser
2
- class EntryMaker
3
- def self.call: (String str) -> Hash[Symbol, Array[String]]
4
- def self.call_minimized: (String str) -> Hash[Symbol, (String | Array[String])]
5
-
6
- def initialize: (String str) -> void
7
- def make: -> Hash[Symbol, Array[String]]
8
- def make_minimized: -> Hash[Symbol, String | Array[String]]
9
- def lines_decoded_to_h: -> Hash[Symbol, Array[String]]
10
- def lines_decoded: -> Array[Array[String]]
11
- def line_decoder: (String line) -> Array[String]
12
- def lines: -> Array[String]
13
- end
14
- end
data/sig/ldif_parser.rbs DELETED
@@ -1,13 +0,0 @@
1
- class LdifParser
2
- def self.parse_file: (String ldif_path, minimized: bool, only: Array[String], except: Array[String]) -> Array[Hash[Symbol, (String | Array[String])]]
3
- def self.parse: (String str, minimized: bool, only: Array[String], except: Array[String]) -> Array[Hash[Symbol, (String | Array[String])]]
4
- def self.included_pattern: (Array[String] patterns) -> Array[String]
5
- def self.excluded_pattern: (Array[String] patterns) -> Array[String]
6
- attr_reader included_patterns: Array[String]
7
- attr_reader excluded_patterns: Array[String]
8
-
9
- def initialize: (String str) -> void
10
- def parse!: -> Array[Hash[Symbol, (String | Array[String])]]
11
- def str_parts: -> Array[String]
12
- def line_has_to_be_excluded?: (String line) -> (bool)
13
- end