commonregex 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 673d3901fb5e5f240782e68a6d251c4e989bf9cf
4
- data.tar.gz: a7969bbc0c7dfce93f869b640f7df5e7047f16d2
3
+ metadata.gz: 02fda7030f78adab40ecec65c9c2ea02a9dec872
4
+ data.tar.gz: 0a909452d950d28317a23cbb1a5ffef194805632
5
5
  SHA512:
6
- metadata.gz: 760d9b167e068162018da4b9b702e3c14a04592bb88d4230d05ec6c2cb44019d394f9c22258f9f0466387413552b5c61954a0ab59e29d662bab4b0587cdfdb39
7
- data.tar.gz: c0ce842f64d8e8a8d0fc851635a90a1f6d370d05d773ac885e1e0499703607ff244f2db9675c21b0c4d40f913d53601e86fcbb1e46c5d7aa0d350305693c25a1
6
+ metadata.gz: 98417b983aea23f07e81581b870b4e411f53404e8041919e3c42e2a61318e781874154747d175ed508ac7092c79544b49911cc27cc783030dbce398088b6440b
7
+ data.tar.gz: 137d12c82a6fc9801b5355b41b5dd7ecf5cb3c7b66522a57e210ceff1ac8c1a62c044b9b262cea0ef4d5634bb54f6476dc0e69a3a32a711a86c60a5cadd5e365
@@ -1,106 +1,68 @@
1
1
  class CommonRegex
2
2
 
3
- # Methods used to generate @date_regex
4
- def self.opt (regex)
5
- '(?:' + regex + ')?'
6
- end
7
-
8
- def self.group(regex)
9
- '(?:' + regex + ')'
10
- end
11
-
12
- def self.any(regexes)
13
- regexes.join('|')
14
- end
15
-
16
- # Generate @date_regex
17
- month_regex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)'
18
- day_regex = '[0-3]?\\d(?:st|nd|rd|th)?'
19
- year_regex = '\\d{4}'
20
-
21
- @@date_regex = Regexp.new('(' + CommonRegex.group(
22
- CommonRegex.any(
23
- [
24
- day_regex + '\\s+(?:of\\s+)?' + month_regex,
25
- month_regex + '\\s+' + day_regex
26
- ]
27
- )
28
- ) + '(?:\\,)?\\s*' + CommonRegex.opt(year_regex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4})', Regexp::IGNORECASE || Regexp::MULTILINE)
29
-
30
- @@time_regex = /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/im
31
- @@phone_regex = /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/im
32
- @@links_regex = /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*@\)|[^\s`!()\[\]{};:\'".,<>?]))/im
33
- @@emails_regex = /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/im
34
- @@ipv4_regex = /\b(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\b/m
35
- @@ipv6_regex = /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]@{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/im
36
- @@hex_colors_regex = /(#(?:[0-9a-fA-F]{3}){1,2})\b/im
37
- @@acronyms_regex = /\b(([A-Z]\.)+|([A-Z]){2,})/m
38
- @@money_regex = /(((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b)/m
39
- @@percentage_regex = /((100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%)/m
40
- @@credit_card_regex = /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/m
41
- @@address_regex = /(\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|traill|drive|court|parkway|boulevard)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd)\.(?=\b)?))/im
42
-
43
-
44
- def initialize(text = '')
45
- @text = text;
46
- end
47
-
48
- def get_dates(text = @text)
49
- get_matches(text, @@date_regex)
50
- end
51
-
52
- def get_times(text = @text)
53
- get_matches(text, @@time_regex)
54
- end
55
-
56
- def get_phones(text = @text)
57
- get_matches(text, @@phone_regex)
58
- end
59
-
60
- def get_links(text = @text)
61
- get_matches(text, @@links_regex)
62
- end
63
-
64
- def get_emails(text = @text)
65
- get_matches(text, @@emails_regex)
66
- end
67
-
68
- def get_ipv4(text = @text)
69
- get_matches(text, @@ipv4_regex)
70
- end
71
-
72
- def get_ipv6(text = @text)
73
- get_matches(text, @@ipv6_regex)
74
- end
75
-
76
- def get_hex_colors(text = @text)
77
- get_matches(text, @@hex_colors_regex)
78
- end
79
-
80
- def get_acronyms(text = @text)
81
- get_matches(text, @@acronyms_regex)
82
- end
83
-
84
- def get_money(text = @text)
85
- get_matches(text, @@money_regex)
86
- end
87
-
88
- def get_percentages(text = @text)
89
- get_matches(text, @@percentage_regex)
90
- end
91
-
92
- def get_credit_cards(text = @text)
93
- get_matches(text, @@credit_card_regex)
94
- end
95
-
96
- def get_addresses(text = @text)
97
- get_matches(text, @@address_regex)
98
- end
99
-
100
- private
101
-
102
- def get_matches(text, regex)
103
- text.scan(regex).collect{|x| x[0]}
104
- end
105
-
106
- end
3
+ # Methods used to generate @date_regex
4
+ def self.opt(regex)
5
+ '(?:' + regex + ')?'
6
+ end
7
+
8
+ def self.group(regex)
9
+ '(?:' + regex + ')'
10
+ end
11
+
12
+ def self.any(regexes)
13
+ regexes.join('|')
14
+ end
15
+
16
+ # Generate @date_regex
17
+ month_regex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)'
18
+ day_regex = '[0-3]?\\d(?:st|nd|rd|th)?'
19
+ year_regex = '\\d{4}'
20
+
21
+ @@dates_regex = Regexp.new('(' + CommonRegex.group(
22
+ CommonRegex.any(
23
+ [
24
+ day_regex + '\\s+(?:of\\s+)?' + month_regex,
25
+ month_regex + '\\s+' + day_regex
26
+ ]
27
+ )
28
+ ) + '(?:\\,)?\\s*' + CommonRegex.opt(year_regex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4})', Regexp::IGNORECASE || Regexp::MULTILINE)
29
+
30
+ @@acronyms_regex = /\b(([A-Z]\.)+|([A-Z]){2,})/m
31
+ @@addresses_regex = /(\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|traill|drive|court|parkway|boulevard)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd)\.(?=\b)?))/im
32
+ @@credit_cards_regex = /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/m
33
+ @@emails_regex = /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/im
34
+ @@hex_colors_regex = /(#(?:[0-9a-fA-F]{3}){1,2})\b/im
35
+ @@ipv4_regex = /\b(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\b/m
36
+ @@ipv6_regex = /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]@{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/im
37
+ @@links_regex = /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*@\)|[^\s`!()\[\]{};:\'".,<>?]))/im
38
+ @@money_regex = /(((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b)/m
39
+ @@percentages_regex = /((100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%)/m
40
+ @@phones_regex = /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/im
41
+ @@times_regex = /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/im
42
+
43
+ %w{acronyms addresses credit_cards dates emails hex_colors ipv4 ipv6 links
44
+ money percentages phones times}.each do |regex|
45
+ class_eval <<-RUBY.gsub(/^\s{6}/, ''), __FILE__, __LINE__
46
+ def self.get_#{regex}(text)
47
+ get_matches(text, @@#{regex}_regex)
48
+ end
49
+
50
+ def get_#{regex}
51
+ self.class.get_#{regex}(@text)
52
+ end
53
+ RUBY
54
+ end
55
+
56
+ def initialize(text = '')
57
+ @text = text;
58
+ end
59
+
60
+ private
61
+
62
+ def self.get_matches(text, regex)
63
+ text.scan(regex).collect{|x| x[0]}
64
+ end
65
+
66
+ end
67
+
68
+ require "commonregex/version"
@@ -0,0 +1,3 @@
1
+ class CommonRegex
2
+ VERSION = '0.1.0'
3
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commonregex
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Talysson Oliveira Cassiano
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-23 00:00:00.000000000 Z
11
+ date: 2015-01-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Find a lot of kinds of common information in a string. CommonRegex port
14
14
  for Ruby.
@@ -18,6 +18,7 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/commonregex.rb
21
+ - lib/commonregex/version.rb
21
22
  homepage: https://github.com/talyssonoc/CommonRegexRuby
22
23
  licenses:
23
24
  - MIT