regex_generator 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/bin/generate_regex +2 -4
- data/lib/regex_generator.rb +1 -0
- data/lib/regex_generator/characters_recognizer.rb +48 -23
- data/lib/regex_generator/exceptions.rb +6 -0
- data/lib/regex_generator/generator.rb +15 -3
- data/lib/regex_generator/target.rb +10 -0
- data/lib/regex_generator/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1ef2c0d2c4fb493acc32581875db3cfae61249d5e447d5f2d0a146bb4ef5de2f
|
4
|
+
data.tar.gz: 312b3c3762c88421675312ea32a663532892bb2e52352ace680242ac90c9bec7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ecf144cf3b49ee96f9eeb1c12808e204dec22c7ef996382097c4e0f3e361f7d3945e17fdabe049903ce931c59238c975394d850916dbdbad26e227dc86ec6fb6
|
7
|
+
data.tar.gz: 1d640a4be4e4c8ca58d1061d4a05d7188492893185e8dcfc8c5d36ce93a5c2e8a3286e864bbfe0fd45ac1e8c6813dbaea23ea2a3e47529dd38d21af1bf845822
|
data/README.md
CHANGED
@@ -40,6 +40,9 @@ Allowed options:
|
|
40
40
|
- `:self_recognition` - Symbols that will be represented as itself. Can be string or array
|
41
41
|
- `:look` - `:ahead` or `:behind` (`:behind` by default). To generate regex with text after or before the value
|
42
42
|
- `:strict_count` - When it `true` regex will generated with strict chars count
|
43
|
+
- `:title` - Regex will generated for provided title. If `:title` is provided
|
44
|
+
as Hash (i.e. to generate regex with name capturing groups), `:title` must
|
45
|
+
contains the same keys as target
|
43
46
|
|
44
47
|
## Contributing
|
45
48
|
|
data/bin/generate_regex
CHANGED
@@ -19,10 +19,8 @@ OptionParser.new do |parser|
|
|
19
19
|
exit
|
20
20
|
end
|
21
21
|
|
22
|
-
parser.on('--self_recognition=CHARS', 'Recognizes chargs as itself'
|
23
|
-
|
24
|
-
end
|
25
|
-
|
22
|
+
parser.on('--self_recognition=CHARS', 'Recognizes chargs as itself', &:chars)
|
23
|
+
parser.on('--title=TITLE', 'Generates regex for provided title', &:title)
|
26
24
|
parser.on('--strict_count', 'Generates regex with a strcit chars count')
|
27
25
|
parser.on('--exact_target', 'Generates regex with exact target value')
|
28
26
|
end.parse!(into: params)
|
data/lib/regex_generator.rb
CHANGED
@@ -17,6 +17,7 @@ module RegexGenerator
|
|
17
17
|
# or after the target
|
18
18
|
# @option options [true, false] :strict_count to generate regex with a strict
|
19
19
|
# chars count
|
20
|
+
# @option options [String, Hash] :title to generate regex for provided title
|
20
21
|
# @return [Regexp]
|
21
22
|
#
|
22
23
|
# @example Generate regex
|
@@ -1,31 +1,56 @@
|
|
1
1
|
module RegexGenerator
|
2
2
|
class CharactersRecognizer
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
3
|
+
class << self
|
4
|
+
PATTERNS = [/[A-Z]/, /[a-z]/, /\d/, /\n/, /\s/, /./].freeze
|
5
|
+
|
6
|
+
# Creates array with regex representation for each char from the text
|
7
|
+
#
|
8
|
+
# @param text [String]
|
9
|
+
# @param options [Hash] options to recognize regex patterns with
|
10
|
+
# @option options [Array] :self_recognition to recognize chars as itself
|
11
|
+
# @option options [String, Hash] :title to recognize all chars excluding
|
12
|
+
# title
|
13
|
+
# @return [Array]
|
14
|
+
def recognize(text, options = {})
|
15
|
+
return [] unless text
|
16
|
+
|
17
|
+
result = []
|
18
|
+
|
19
|
+
for_each_char(text, options) do |char, title|
|
20
|
+
next result << title if title
|
21
|
+
|
22
|
+
PATTERNS.each do |pattern|
|
23
|
+
next unless char[pattern]
|
24
|
+
|
25
|
+
escaped_char = Regexp.escape(char)
|
26
|
+
res_pattern = escaped_char
|
27
|
+
res_pattern = pattern.source if (char.eql?(escaped_char) &&
|
28
|
+
!options[:self_recognition]&.include?(escaped_char)) || char[/\s/]
|
29
|
+
|
30
|
+
break result << res_pattern
|
31
|
+
end
|
25
32
|
end
|
33
|
+
|
34
|
+
result
|
26
35
|
end
|
27
36
|
|
28
|
-
|
37
|
+
private
|
38
|
+
|
39
|
+
# Executes the block for each char in the text excluding the titles
|
40
|
+
def for_each_char(text, options)
|
41
|
+
title_regex = if options[:title].kind_of?(Hash)
|
42
|
+
/(#{options[:title].escape.join('|')})/
|
43
|
+
else
|
44
|
+
/(#{options[:title]})/
|
45
|
+
end
|
46
|
+
|
47
|
+
text.split(title_regex).each do |txt|
|
48
|
+
next if txt.empty?
|
49
|
+
next yield(nil, txt) if txt[title_regex].eql?(txt)
|
50
|
+
|
51
|
+
txt.chars.each { |char| yield char, nil }
|
52
|
+
end
|
53
|
+
end
|
29
54
|
end
|
30
55
|
end
|
31
56
|
end
|
@@ -12,18 +12,28 @@ module RegexGenerator
|
|
12
12
|
# or after the target
|
13
13
|
# @option options [true, false] :strict_count to generate regex with a
|
14
14
|
# strict chars count
|
15
|
+
# @option options [String, Hash] :title to generate regex for provided title
|
15
16
|
def initialize(target, text, options = {})
|
16
17
|
@text = text
|
17
18
|
@target = RegexGenerator::Target.new(target)
|
19
|
+
@title = RegexGenerator::Target.new(options[:title])
|
20
|
+
if options[:title] && !@title.keys_equal?(@target)
|
21
|
+
raise RegexGenerator::InvalidOption, :title
|
22
|
+
end
|
23
|
+
|
24
|
+
@title_str = @title.to_s
|
18
25
|
@target_str = @target.to_s
|
19
26
|
@options = options
|
20
27
|
end
|
21
28
|
|
22
29
|
# @return [Regexp]
|
23
30
|
# @raise [TargetNotFoundError] if target text was not found in the text
|
24
|
-
# @raise [InvalidOption] if :look option is not :ahead or :behind
|
31
|
+
# @raise [InvalidOption] if :look option is not :ahead or :behind or :title
|
32
|
+
# has different keys than target keys
|
33
|
+
# @raise [TitleNotFoundError] if :title was not found in the text
|
25
34
|
def generate
|
26
35
|
raise RegexGenerator::TargetNotFoundError unless @target.present?(@text)
|
36
|
+
raise RegexGenerator::TitleNotFoundError unless @title.present?(@text)
|
27
37
|
|
28
38
|
string_regex_chars = recognize_text(cut_nearest_text, options)
|
29
39
|
string_patterns_array = slice_to_identicals(string_regex_chars)
|
@@ -51,8 +61,8 @@ module RegexGenerator
|
|
51
61
|
|
52
62
|
def text_regex_for_string
|
53
63
|
{
|
54
|
-
behind: /[\w\W]*((?:\n|\A)[\w\W]
|
55
|
-
ahead: /#{@target.escape}([\w\W]
|
64
|
+
behind: /[\w\W]*((?:\n|\A)[\w\W]*?#{@title_str}\s*)#{@target.escape}/,
|
65
|
+
ahead: /#{@target.escape}([\w\W]*?#{@title_str}(?:\n|\Z))/
|
56
66
|
}[options[:look]]
|
57
67
|
end
|
58
68
|
|
@@ -82,6 +92,8 @@ module RegexGenerator
|
|
82
92
|
|
83
93
|
# Prepares options
|
84
94
|
def options
|
95
|
+
@options[:title] = @title
|
96
|
+
|
85
97
|
if @options[:self_recognition].kind_of? String
|
86
98
|
@options[:self_recognition] = @options[:self_recognition].chars
|
87
99
|
end
|
@@ -45,5 +45,15 @@ module RegexGenerator
|
|
45
45
|
result[key] = Regexp.escape value
|
46
46
|
end
|
47
47
|
end
|
48
|
+
|
49
|
+
# Compares keys (for strings always true)
|
50
|
+
#
|
51
|
+
# @param other_target [Target]
|
52
|
+
# @ return [true, false]
|
53
|
+
def keys_equal?(other_target)
|
54
|
+
return false unless to_s.class.eql?(other_target.to_s.class)
|
55
|
+
|
56
|
+
kind_of?(Hash) ? to_s.keys.eql?(other_target.to_s.keys) : true
|
57
|
+
end
|
48
58
|
end
|
49
59
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regex_generator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- o.vykhor
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-04-
|
11
|
+
date: 2019-04-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|