regex_generator 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/README.md +3 -2
- data/lib/regex_generator/generator.rb +64 -12
- data/lib/regex_generator/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f17cdfbe4067e8ea3574bf6a2588a078347c52b6dd0797e244d1235f2469368
|
4
|
+
data.tar.gz: be13b4aa4cc21fb7fc679f82eba084f52069a4926134bef4ff7a51b7c08899da
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 68acdc534ecb0e1753e645f312af3bfc6b52a972440907768d0c68dbee622dbb9044f1af7fca8d5062f3436a129d0eac05f9f596ece7da70ba0bc491ee3361da
|
7
|
+
data.tar.gz: e40ece7ba16ea77f36cc319c4c6bf1c7f7110b31fddad77e23eaa6e99292eb96ce3f920810347157d0bbef33c7a7ab953385bf3126061e10b05e407e8bdf848d
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -23,10 +23,11 @@ Or install it yourself as:
|
|
23
23
|
```ruby
|
24
24
|
RegexGenerator.generate('45', 'some text 45')
|
25
25
|
```
|
26
|
+
You can pass target as hash to generate regex with named capturing groups.
|
26
27
|
|
27
28
|
You can use additional options to generate regex. For example:
|
28
29
|
|
29
|
-
```
|
30
|
+
```ruby
|
30
31
|
RegexGenerator.generate('45', 'some text 45', exact_target: true)
|
31
32
|
```
|
32
33
|
|
@@ -40,7 +41,7 @@ Allowed options:
|
|
40
41
|
- Create your feature branch (git checkout -b my-new-feature)
|
41
42
|
- Commit your changes (git commit -am 'Add some feature')
|
42
43
|
- Push to the branch (git push origin my-new-feature)
|
43
|
-
- Create new Pull
|
44
|
+
- Create new Pull Request
|
44
45
|
|
45
46
|
## License
|
46
47
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module RegexGenerator
|
2
2
|
class Generator
|
3
|
-
# @param target [String]
|
3
|
+
# @param target [String, Hash] target string or hash with named targets
|
4
4
|
# @param text [String] source text
|
5
5
|
# @param options [Hash] options to generate regex with
|
6
6
|
# @option options [true, false] :exact_target to generate regex
|
@@ -16,19 +16,13 @@ module RegexGenerator
|
|
16
16
|
# @return [Regexp]
|
17
17
|
# @raise [TargetNotFoundError] if target text was not found in the text
|
18
18
|
def generate
|
19
|
-
raise RegexGenerator::TargetNotFoundError unless
|
19
|
+
raise RegexGenerator::TargetNotFoundError unless target_present?
|
20
20
|
|
21
|
-
string_regex_chars =
|
21
|
+
string_regex_chars = recognize_text(cut_nearest_text, options)
|
22
22
|
string_patterns_array = slice_to_identicals(string_regex_chars)
|
23
23
|
string_regex_str = join_patterns(string_patterns_array)
|
24
|
-
target_regex_str = if @options[:exact_target]
|
25
|
-
Regexp.escape @target
|
26
|
-
else
|
27
|
-
target_patterns_array = slice_to_identicals(recognize(@target, options))
|
28
|
-
join_patterns(target_patterns_array)
|
29
|
-
end
|
30
24
|
|
31
|
-
Regexp.new
|
25
|
+
Regexp.new string_regex_str
|
32
26
|
end
|
33
27
|
|
34
28
|
private
|
@@ -36,7 +30,17 @@ module RegexGenerator
|
|
36
30
|
# Cuts nearest to target, text from the start of the string
|
37
31
|
def cut_nearest_text
|
38
32
|
start_pattern = @text[/\n/] ? /\n/ : /^/
|
39
|
-
@
|
33
|
+
if @target.is_a? Hash
|
34
|
+
target_regex = /(?:#{escaped_target.join('|')})/
|
35
|
+
text_regex_str = (1..@target.count).map do |step|
|
36
|
+
all = step.eql?(1) ? '.' : '[\w\W]'
|
37
|
+
"#{all}+?#{target_regex}"
|
38
|
+
end.join
|
39
|
+
text_regex = Regexp.new "#{start_pattern}#{text_regex_str}"
|
40
|
+
@text[text_regex]
|
41
|
+
else
|
42
|
+
@text[/[\w\W]*(#{start_pattern}[\w\W]+?)#{Regexp.escape(@target)}/, 1]
|
43
|
+
end
|
40
44
|
end
|
41
45
|
|
42
46
|
# Slices array to subarrays with identical neighbor elements
|
@@ -73,7 +77,55 @@ module RegexGenerator
|
|
73
77
|
@options
|
74
78
|
end
|
75
79
|
|
76
|
-
|
80
|
+
# Checks if target is present in the text
|
81
|
+
def target_present?
|
82
|
+
return @target.values.all? { |t| @text[t] } if @target.is_a? Hash
|
83
|
+
|
84
|
+
!@text[@target].nil?
|
85
|
+
end
|
86
|
+
|
87
|
+
# If keys false, method returns array with escaped values, otherwise hash
|
88
|
+
# with escaped values (only if target is a hash)
|
89
|
+
def escaped_target(keys: false)
|
90
|
+
return Regexp.escape @target if @target.is_a? String
|
91
|
+
return @target.values.map { |v| Regexp.escape v } unless keys
|
92
|
+
|
93
|
+
@target.each_with_object({}) do |(key, value), result|
|
94
|
+
result[key] = Regexp.escape value
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Recognizes target depending on type (String or Hash)
|
99
|
+
def target_patterns
|
100
|
+
return escaped_target(keys: true) if @options[:exact_target]
|
101
|
+
|
102
|
+
if @target.is_a? Hash
|
103
|
+
@target.each_with_object({}) do |(key, value), patterns|
|
104
|
+
slices_patterns = slice_to_identicals(recognize(value))
|
105
|
+
patterns[key] = join_patterns(slices_patterns)
|
106
|
+
end
|
107
|
+
else
|
108
|
+
target_patterns_array = slice_to_identicals(recognize(@target))
|
109
|
+
join_patterns(target_patterns_array)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Recognizes text depending on target type
|
114
|
+
def recognize_text(text, options = {})
|
115
|
+
unless @target.is_a? Hash
|
116
|
+
return recognize(text, options) << "(#{target_patterns})"
|
117
|
+
end
|
118
|
+
|
119
|
+
target_regex = /#{escaped_target.join('|')}/
|
120
|
+
text.split(/(#{target_regex})/).map do |str|
|
121
|
+
next recognize(str, options) unless str[target_regex]
|
122
|
+
|
123
|
+
key = @target.key(str)
|
124
|
+
"(?<#{key}>#{target_patterns[key]})"
|
125
|
+
end.flatten
|
126
|
+
end
|
127
|
+
|
128
|
+
def recognize(text, options = {})
|
77
129
|
RegexGenerator::CharactersRecognizer.recognize(text, options)
|
78
130
|
end
|
79
131
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regex_generator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- o.vykhor
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-01-
|
11
|
+
date: 2019-01-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|