regex_generator 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/lib/regex_generator.rb +2 -1
- data/lib/regex_generator/characters_recognizer.rb +1 -0
- data/lib/regex_generator/generator.rb +20 -42
- data/lib/regex_generator/target.rb +49 -0
- data/lib/regex_generator/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 481f895706d869e948bda4786af67e5c12940228c50cd53f33b7d2f84eb4a0f1
|
4
|
+
data.tar.gz: 847974b444d2a2d217fc042144e625c60c297dec1dbfcf60bc692c0b73e985e2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7f8c8af1fb62b699fe599431e83f568fe51ea752feb1749c933c876ff76e4a7c16e8e16aeb3467add14eb9aac6f63c07984f6ad4c849a64b01beeff36467284
|
7
|
+
data.tar.gz: 9b1038527f6f696146eccbba4f9b4abc1a17e0c201e7d35d5d03bca8b971f80660c52509375481efe62666efc491f1dde101ead3f2368ffa2bc03f73cc14eb38
|
data/Gemfile
CHANGED
data/lib/regex_generator.rb
CHANGED
@@ -2,12 +2,13 @@ require 'regex_generator/version'
|
|
2
2
|
require 'byebug'
|
3
3
|
require 'regex_generator/generator'
|
4
4
|
require 'regex_generator/characters_recognizer'
|
5
|
+
require 'regex_generator/target'
|
5
6
|
require 'regex_generator/exceptions'
|
6
7
|
|
7
8
|
module RegexGenerator
|
8
9
|
# Generates regex by text and target text
|
9
10
|
#
|
10
|
-
# @param target [String] what you want to find
|
11
|
+
# @param target [String, Integer, Float, Hash] what you want to find
|
11
12
|
# @param text [String] source text
|
12
13
|
# @param options [Hash] options to generate regex with
|
13
14
|
# @option options [true, false] :exact_target to generate regex
|
@@ -10,14 +10,15 @@ module RegexGenerator
|
|
10
10
|
# itself
|
11
11
|
def initialize(target, text, options = {})
|
12
12
|
@text = text
|
13
|
-
@target =
|
13
|
+
@target = RegexGenerator::Target.new(target)
|
14
|
+
@target_str = @target.to_s
|
14
15
|
@options = options
|
15
16
|
end
|
16
17
|
|
17
18
|
# @return [Regexp]
|
18
19
|
# @raise [TargetNotFoundError] if target text was not found in the text
|
19
20
|
def generate
|
20
|
-
raise RegexGenerator::TargetNotFoundError unless
|
21
|
+
raise RegexGenerator::TargetNotFoundError unless @target.present?(@text)
|
21
22
|
|
22
23
|
string_regex_chars = recognize_text(cut_nearest_text, options)
|
23
24
|
string_patterns_array = slice_to_identicals(string_regex_chars)
|
@@ -31,17 +32,20 @@ module RegexGenerator
|
|
31
32
|
# Cuts nearest to target, text from the start of the string
|
32
33
|
def cut_nearest_text
|
33
34
|
start_pattern = @text[/\n/] ? /\n/ : /^/
|
34
|
-
|
35
|
-
|
36
|
-
|
35
|
+
|
36
|
+
if @target.kind_of? Hash
|
37
|
+
target_regex = /(?:#{@target.escape.join('|')})/
|
38
|
+
text_regex_str = (1..@target_str.count).map do |step|
|
37
39
|
all = step.eql?(1) ? '.' : '[\w\W]'
|
38
40
|
"#{all}+?#{target_regex}"
|
39
41
|
end.join
|
40
42
|
text_regex = Regexp.new "#{start_pattern}#{text_regex_str}"
|
41
|
-
|
42
|
-
|
43
|
-
@text[/[\w\W]*(#{start_pattern}[\w\W]+?)#{Regexp.escape(@target)}/, 1]
|
43
|
+
|
44
|
+
return @text[text_regex]
|
44
45
|
end
|
46
|
+
|
47
|
+
regex = /[\w\W]*(#{start_pattern}[\w\W]+?)#{Regexp.escape(@target_str)}/
|
48
|
+
@text[regex, 1]
|
45
49
|
end
|
46
50
|
|
47
51
|
# Slices array to subarrays with identical neighbor elements
|
@@ -71,45 +75,19 @@ module RegexGenerator
|
|
71
75
|
def options
|
72
76
|
return @options unless @options.any?
|
73
77
|
|
74
|
-
if @options[:self_recognition].
|
78
|
+
if @options[:self_recognition].kind_of? String
|
75
79
|
@options[:self_recognition] = @options[:self_recognition].chars
|
76
80
|
end
|
77
81
|
|
78
82
|
@options
|
79
83
|
end
|
80
84
|
|
81
|
-
def target_to_s(target)
|
82
|
-
return target.to_s unless target.is_a? Hash
|
83
|
-
|
84
|
-
target.each_with_object({}) do |(key, value), result|
|
85
|
-
result[key] = value.to_s
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
# Checks if target is present in the text
|
90
|
-
def target_present?
|
91
|
-
return @target.values.all? { |t| @text[t] } if @target.is_a? Hash
|
92
|
-
|
93
|
-
!@text[@target].nil?
|
94
|
-
end
|
95
|
-
|
96
|
-
# If keys false, method returns array with escaped values, otherwise hash
|
97
|
-
# with escaped values (only if target is a hash)
|
98
|
-
def escaped_target(keys: false)
|
99
|
-
return Regexp.escape @target if @target.is_a? String
|
100
|
-
return @target.values.map { |v| Regexp.escape v } unless keys
|
101
|
-
|
102
|
-
@target.each_with_object({}) do |(key, value), result|
|
103
|
-
result[key] = Regexp.escape value
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
85
|
# Recognizes target depending on type (String or Hash)
|
108
86
|
def target_patterns
|
109
|
-
return
|
87
|
+
return @target.escape(keys: true) if @options[:exact_target]
|
110
88
|
|
111
|
-
if @target.
|
112
|
-
@
|
89
|
+
if @target.kind_of? Hash
|
90
|
+
@target_str.each_with_object({}) do |(key, value), patterns|
|
113
91
|
slices_patterns = slice_to_identicals(recognize(value))
|
114
92
|
patterns[key] = join_patterns(slices_patterns)
|
115
93
|
end
|
@@ -121,21 +99,21 @@ module RegexGenerator
|
|
121
99
|
|
122
100
|
# Recognizes text depending on target type
|
123
101
|
def recognize_text(text, options = {})
|
124
|
-
unless @target.
|
102
|
+
unless @target.kind_of? Hash
|
125
103
|
return recognize(text, options) << "(#{target_patterns})"
|
126
104
|
end
|
127
105
|
|
128
|
-
target_regex = /#{
|
106
|
+
target_regex = /#{@target.escape.join('|')}/
|
129
107
|
text.split(/(#{target_regex})/).map do |str|
|
130
108
|
next recognize(str, options) unless str[target_regex]
|
131
109
|
|
132
|
-
key = @
|
110
|
+
key = @target_str.key(str)
|
133
111
|
"(?<#{key}>#{target_patterns[key]})"
|
134
112
|
end.flatten
|
135
113
|
end
|
136
114
|
|
137
115
|
def recognize(text, options = {})
|
138
|
-
RegexGenerator::CharactersRecognizer.recognize(text, options)
|
116
|
+
RegexGenerator::CharactersRecognizer.recognize(text.to_s, options)
|
139
117
|
end
|
140
118
|
end
|
141
119
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module RegexGenerator
|
2
|
+
class Target
|
3
|
+
# @param target [String, Integer, Float, Hash] target string or hash with
|
4
|
+
# named targets
|
5
|
+
def initialize(target)
|
6
|
+
@target = target
|
7
|
+
end
|
8
|
+
|
9
|
+
# @param text [String] which should contains the target
|
10
|
+
# @return [true, false]
|
11
|
+
def present?(text)
|
12
|
+
return to_s.values.all? { |t| text[t] } if kind_of? Hash
|
13
|
+
|
14
|
+
!text[to_s].nil?
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts target to string (or values to strings if target is a Hash)
|
18
|
+
#
|
19
|
+
# @return [String, Hash]
|
20
|
+
def to_s
|
21
|
+
return @target.to_s unless @target.kind_of? Hash
|
22
|
+
|
23
|
+
@target.each_with_object({}) do |(key, value), result|
|
24
|
+
result[key] = value.to_s
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Checks type of the target's string representation
|
29
|
+
#
|
30
|
+
# @param type [Class]
|
31
|
+
# @return [true, false]
|
32
|
+
def kind_of?(type)
|
33
|
+
to_s.kind_of? type
|
34
|
+
end
|
35
|
+
|
36
|
+
# Escapes values
|
37
|
+
#
|
38
|
+
# @option keys [true, false] returns Hash with escaped values when true
|
39
|
+
# @return [String, Array, Hash]
|
40
|
+
def escape(keys: false)
|
41
|
+
return Regexp.escape to_s if kind_of? String
|
42
|
+
return to_s.values.map { |v| Regexp.escape v } unless keys
|
43
|
+
|
44
|
+
to_s.each_with_object({}) do |(key, value), result|
|
45
|
+
result[key] = Regexp.escape value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regex_generator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- o.vykhor
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -72,6 +72,7 @@ files:
|
|
72
72
|
- lib/regex_generator/characters_recognizer.rb
|
73
73
|
- lib/regex_generator/exceptions.rb
|
74
74
|
- lib/regex_generator/generator.rb
|
75
|
+
- lib/regex_generator/target.rb
|
75
76
|
- lib/regex_generator/version.rb
|
76
77
|
- regex_generator.gemspec
|
77
78
|
homepage: https://github.com/oleksiivykhor/regex_generator
|
@@ -93,8 +94,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
94
|
- !ruby/object:Gem::Version
|
94
95
|
version: '0'
|
95
96
|
requirements: []
|
96
|
-
|
97
|
-
rubygems_version: 2.7.8
|
97
|
+
rubygems_version: 3.0.1
|
98
98
|
signing_key:
|
99
99
|
specification_version: 4
|
100
100
|
summary: Simple regex generator
|