regex_generator 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aba286d39cd71e85e8393e0aff3f9f4d1872650b4dad12063a7ce84d6d8b2966
4
- data.tar.gz: e291cbac8deb7bcaca1889ed16740a424b711e9e150b2e9136401da2bd3ff608
3
+ metadata.gz: 1ef2c0d2c4fb493acc32581875db3cfae61249d5e447d5f2d0a146bb4ef5de2f
4
+ data.tar.gz: 312b3c3762c88421675312ea32a663532892bb2e52352ace680242ac90c9bec7
5
5
  SHA512:
6
- metadata.gz: 11f42b7a3e9043d23e99069f2d6cecf3611b1748f77acfa88c037ab621e4c319f73a958b084e87b3174eaa2732897bdf901e4c5cca096069bd09cf71eff219ee
7
- data.tar.gz: 379e3baa608f1ae95c600114944b982c98bc3c5928ce6f408fb37a10ef66c6827aca06823a2804c0ec6a2476c221279e846b22d1c8e8e8f66fcf47c9c2122367
6
+ metadata.gz: ecf144cf3b49ee96f9eeb1c12808e204dec22c7ef996382097c4e0f3e361f7d3945e17fdabe049903ce931c59238c975394d850916dbdbad26e227dc86ec6fb6
7
+ data.tar.gz: 1d640a4be4e4c8ca58d1061d4a05d7188492893185e8dcfc8c5d36ce93a5c2e8a3286e864bbfe0fd45ac1e8c6813dbaea23ea2a3e47529dd38d21af1bf845822
data/README.md CHANGED
@@ -40,6 +40,9 @@ Allowed options:
40
40
  - `:self_recognition` - Symbols that will be represented as itself. Can be string or array
41
41
  - `:look` - `:ahead` or `:behind` (`:behind` by default). To generate regex with text after or before the value
42
42
  - `:strict_count` - When it `true` regex will generated with strict chars count
43
+ - `:title` - Regex will generated for provided title. If `:title` is provided
44
+ as Hash (i.e. to generate regex with name capturing groups), `:title` must
45
+ contains the same keys as target
43
46
 
44
47
  ## Contributing
45
48
 
data/bin/generate_regex CHANGED
@@ -19,10 +19,8 @@ OptionParser.new do |parser|
19
19
  exit
20
20
  end
21
21
 
22
- parser.on('--self_recognition=CHARS', 'Recognizes chargs as itself') do |chars|
23
- chars
24
- end
25
-
22
+ parser.on('--self_recognition=CHARS', 'Recognizes chargs as itself', &:chars)
23
+ parser.on('--title=TITLE', 'Generates regex for provided title', &:title)
26
24
  parser.on('--strict_count', 'Generates regex with a strcit chars count')
27
25
  parser.on('--exact_target', 'Generates regex with exact target value')
28
26
  end.parse!(into: params)
@@ -17,6 +17,7 @@ module RegexGenerator
17
17
  # or after the target
18
18
  # @option options [true, false] :strict_count to generate regex with a strict
19
19
  # chars count
20
+ # @option options [String, Hash] :title to generate regex for provided title
20
21
  # @return [Regexp]
21
22
  #
22
23
  # @example Generate regex
@@ -1,31 +1,56 @@
1
1
  module RegexGenerator
2
2
  class CharactersRecognizer
3
- PATTERNS = [/[A-Z]/, /[a-z]/, /\d/, /\n/, /\s/, /./].freeze
4
-
5
- # Creates array with regex representation for each char from the text
6
- #
7
- # @param text [String]
8
- # @param options [Hash] options to recognize regex patterns with
9
- # @option options [Array] :self_recognition to recognize chars as itself
10
- # @return [Array]
11
- def self.recognize(text, options = {})
12
- return [] unless text
13
-
14
- result = []
15
- text.chars.each do |char|
16
- PATTERNS.each do |pattern|
17
- next unless char[pattern]
18
-
19
- escaped_char = Regexp.escape(char)
20
- res_pattern = escaped_char
21
- res_pattern = pattern.source if (char.eql?(escaped_char) &&
22
- !options[:self_recognition]&.include?(escaped_char)) || char[/\s/]
23
-
24
- break result << res_pattern
3
+ class << self
4
+ PATTERNS = [/[A-Z]/, /[a-z]/, /\d/, /\n/, /\s/, /./].freeze
5
+
6
+ # Creates array with regex representation for each char from the text
7
+ #
8
+ # @param text [String]
9
+ # @param options [Hash] options to recognize regex patterns with
10
+ # @option options [Array] :self_recognition to recognize chars as itself
11
+ # @option options [String, Hash] :title to recognize all chars excluding
12
+ # title
13
+ # @return [Array]
14
+ def recognize(text, options = {})
15
+ return [] unless text
16
+
17
+ result = []
18
+
19
+ for_each_char(text, options) do |char, title|
20
+ next result << title if title
21
+
22
+ PATTERNS.each do |pattern|
23
+ next unless char[pattern]
24
+
25
+ escaped_char = Regexp.escape(char)
26
+ res_pattern = escaped_char
27
+ res_pattern = pattern.source if (char.eql?(escaped_char) &&
28
+ !options[:self_recognition]&.include?(escaped_char)) || char[/\s/]
29
+
30
+ break result << res_pattern
31
+ end
25
32
  end
33
+
34
+ result
26
35
  end
27
36
 
28
- result
37
+ private
38
+
39
+ # Executes the block for each char in the text excluding the titles
40
+ def for_each_char(text, options)
41
+ title_regex = if options[:title].kind_of?(Hash)
42
+ /(#{options[:title].escape.join('|')})/
43
+ else
44
+ /(#{options[:title]})/
45
+ end
46
+
47
+ text.split(title_regex).each do |txt|
48
+ next if txt.empty?
49
+ next yield(nil, txt) if txt[title_regex].eql?(txt)
50
+
51
+ txt.chars.each { |char| yield char, nil }
52
+ end
53
+ end
29
54
  end
30
55
  end
31
56
  end
@@ -5,6 +5,12 @@ module RegexGenerator
5
5
  end
6
6
  end
7
7
 
8
+ class TitleNotFoundError < StandardError
9
+ def message
10
+ 'The title was not found in the provided text'
11
+ end
12
+ end
13
+
8
14
  class InvalidOption < StandardError
9
15
  def initialize(*options)
10
16
  @options = options
@@ -12,18 +12,28 @@ module RegexGenerator
12
12
  # or after the target
13
13
  # @option options [true, false] :strict_count to generate regex with a
14
14
  # strict chars count
15
+ # @option options [String, Hash] :title to generate regex for provided title
15
16
  def initialize(target, text, options = {})
16
17
  @text = text
17
18
  @target = RegexGenerator::Target.new(target)
19
+ @title = RegexGenerator::Target.new(options[:title])
20
+ if options[:title] && !@title.keys_equal?(@target)
21
+ raise RegexGenerator::InvalidOption, :title
22
+ end
23
+
24
+ @title_str = @title.to_s
18
25
  @target_str = @target.to_s
19
26
  @options = options
20
27
  end
21
28
 
22
29
  # @return [Regexp]
23
30
  # @raise [TargetNotFoundError] if target text was not found in the text
24
- # @raise [InvalidOption] if :look option is not :ahead or :behind
31
+ # @raise [InvalidOption] if :look option is not :ahead or :behind or :title
32
+ # has different keys than target keys
33
+ # @raise [TitleNotFoundError] if :title was not found in the text
25
34
  def generate
26
35
  raise RegexGenerator::TargetNotFoundError unless @target.present?(@text)
36
+ raise RegexGenerator::TitleNotFoundError unless @title.present?(@text)
27
37
 
28
38
  string_regex_chars = recognize_text(cut_nearest_text, options)
29
39
  string_patterns_array = slice_to_identicals(string_regex_chars)
@@ -51,8 +61,8 @@ module RegexGenerator
51
61
 
52
62
  def text_regex_for_string
53
63
  {
54
- behind: /[\w\W]*((?:\n|\A)[\w\W]+?)#{@target.escape}/,
55
- ahead: /#{@target.escape}([\w\W]+?(?:\n|\Z))/
64
+ behind: /[\w\W]*((?:\n|\A)[\w\W]*?#{@title_str}\s*)#{@target.escape}/,
65
+ ahead: /#{@target.escape}([\w\W]*?#{@title_str}(?:\n|\Z))/
56
66
  }[options[:look]]
57
67
  end
58
68
 
@@ -82,6 +92,8 @@ module RegexGenerator
82
92
 
83
93
  # Prepares options
84
94
  def options
95
+ @options[:title] = @title
96
+
85
97
  if @options[:self_recognition].kind_of? String
86
98
  @options[:self_recognition] = @options[:self_recognition].chars
87
99
  end
@@ -45,5 +45,15 @@ module RegexGenerator
45
45
  result[key] = Regexp.escape value
46
46
  end
47
47
  end
48
+
49
+ # Compares keys (for strings always true)
50
+ #
51
+ # @param other_target [Target]
52
+ # @ return [true, false]
53
+ def keys_equal?(other_target)
54
+ return false unless to_s.class.eql?(other_target.to_s.class)
55
+
56
+ kind_of?(Hash) ? to_s.keys.eql?(other_target.to_s.keys) : true
57
+ end
48
58
  end
49
59
  end
@@ -1,3 +1,3 @@
1
1
  module RegexGenerator
2
- VERSION = '0.3.4'
2
+ VERSION = '0.3.5'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regex_generator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - o.vykhor
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-04-16 00:00:00.000000000 Z
11
+ date: 2019-04-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler