commonregex 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 673d3901fb5e5f240782e68a6d251c4e989bf9cf
4
- data.tar.gz: a7969bbc0c7dfce93f869b640f7df5e7047f16d2
3
+ metadata.gz: 02fda7030f78adab40ecec65c9c2ea02a9dec872
4
+ data.tar.gz: 0a909452d950d28317a23cbb1a5ffef194805632
5
5
  SHA512:
6
- metadata.gz: 760d9b167e068162018da4b9b702e3c14a04592bb88d4230d05ec6c2cb44019d394f9c22258f9f0466387413552b5c61954a0ab59e29d662bab4b0587cdfdb39
7
- data.tar.gz: c0ce842f64d8e8a8d0fc851635a90a1f6d370d05d773ac885e1e0499703607ff244f2db9675c21b0c4d40f913d53601e86fcbb1e46c5d7aa0d350305693c25a1
6
+ metadata.gz: 98417b983aea23f07e81581b870b4e411f53404e8041919e3c42e2a61318e781874154747d175ed508ac7092c79544b49911cc27cc783030dbce398088b6440b
7
+ data.tar.gz: 137d12c82a6fc9801b5355b41b5dd7ecf5cb3c7b66522a57e210ceff1ac8c1a62c044b9b262cea0ef4d5634bb54f6476dc0e69a3a32a711a86c60a5cadd5e365
@@ -1,106 +1,68 @@
1
1
  class CommonRegex
2
2
 
3
- # Methods used to generate @date_regex
4
- def self.opt (regex)
5
- '(?:' + regex + ')?'
6
- end
7
-
8
- def self.group(regex)
9
- '(?:' + regex + ')'
10
- end
11
-
12
- def self.any(regexes)
13
- regexes.join('|')
14
- end
15
-
16
- # Generate @date_regex
17
- month_regex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)'
18
- day_regex = '[0-3]?\\d(?:st|nd|rd|th)?'
19
- year_regex = '\\d{4}'
20
-
21
- @@date_regex = Regexp.new('(' + CommonRegex.group(
22
- CommonRegex.any(
23
- [
24
- day_regex + '\\s+(?:of\\s+)?' + month_regex,
25
- month_regex + '\\s+' + day_regex
26
- ]
27
- )
28
- ) + '(?:\\,)?\\s*' + CommonRegex.opt(year_regex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4})', Regexp::IGNORECASE || Regexp::MULTILINE)
29
-
30
- @@time_regex = /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/im
31
- @@phone_regex = /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/im
32
- @@links_regex = /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*@\)|[^\s`!()\[\]{};:\'".,<>?]))/im
33
- @@emails_regex = /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/im
34
- @@ipv4_regex = /\b(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\b/m
35
- @@ipv6_regex = /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]@{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/im
36
- @@hex_colors_regex = /(#(?:[0-9a-fA-F]{3}){1,2})\b/im
37
- @@acronyms_regex = /\b(([A-Z]\.)+|([A-Z]){2,})/m
38
- @@money_regex = /(((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b)/m
39
- @@percentage_regex = /((100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%)/m
40
- @@credit_card_regex = /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/m
41
- @@address_regex = /(\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|traill|drive|court|parkway|boulevard)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd)\.(?=\b)?))/im
42
-
43
-
44
- def initialize(text = '')
45
- @text = text;
46
- end
47
-
48
- def get_dates(text = @text)
49
- get_matches(text, @@date_regex)
50
- end
51
-
52
- def get_times(text = @text)
53
- get_matches(text, @@time_regex)
54
- end
55
-
56
- def get_phones(text = @text)
57
- get_matches(text, @@phone_regex)
58
- end
59
-
60
- def get_links(text = @text)
61
- get_matches(text, @@links_regex)
62
- end
63
-
64
- def get_emails(text = @text)
65
- get_matches(text, @@emails_regex)
66
- end
67
-
68
- def get_ipv4(text = @text)
69
- get_matches(text, @@ipv4_regex)
70
- end
71
-
72
- def get_ipv6(text = @text)
73
- get_matches(text, @@ipv6_regex)
74
- end
75
-
76
- def get_hex_colors(text = @text)
77
- get_matches(text, @@hex_colors_regex)
78
- end
79
-
80
- def get_acronyms(text = @text)
81
- get_matches(text, @@acronyms_regex)
82
- end
83
-
84
- def get_money(text = @text)
85
- get_matches(text, @@money_regex)
86
- end
87
-
88
- def get_percentages(text = @text)
89
- get_matches(text, @@percentage_regex)
90
- end
91
-
92
- def get_credit_cards(text = @text)
93
- get_matches(text, @@credit_card_regex)
94
- end
95
-
96
- def get_addresses(text = @text)
97
- get_matches(text, @@address_regex)
98
- end
99
-
100
- private
101
-
102
- def get_matches(text, regex)
103
- text.scan(regex).collect{|x| x[0]}
104
- end
105
-
106
- end
3
+ # Methods used to generate @date_regex
4
+ def self.opt(regex)
5
+ '(?:' + regex + ')?'
6
+ end
7
+
8
+ def self.group(regex)
9
+ '(?:' + regex + ')'
10
+ end
11
+
12
+ def self.any(regexes)
13
+ regexes.join('|')
14
+ end
15
+
16
+ # Generate @date_regex
17
+ month_regex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)'
18
+ day_regex = '[0-3]?\\d(?:st|nd|rd|th)?'
19
+ year_regex = '\\d{4}'
20
+
21
+ @@dates_regex = Regexp.new('(' + CommonRegex.group(
22
+ CommonRegex.any(
23
+ [
24
+ day_regex + '\\s+(?:of\\s+)?' + month_regex,
25
+ month_regex + '\\s+' + day_regex
26
+ ]
27
+ )
28
+ ) + '(?:\\,)?\\s*' + CommonRegex.opt(year_regex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4})', Regexp::IGNORECASE || Regexp::MULTILINE)
29
+
30
+ @@acronyms_regex = /\b(([A-Z]\.)+|([A-Z]){2,})/m
31
+ @@addresses_regex = /(\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|traill|drive|court|parkway|boulevard)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd)\.(?=\b)?))/im
32
+ @@credit_cards_regex = /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/m
33
+ @@emails_regex = /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/im
34
+ @@hex_colors_regex = /(#(?:[0-9a-fA-F]{3}){1,2})\b/im
35
+ @@ipv4_regex = /\b(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\b/m
36
+ @@ipv6_regex = /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]@{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/im
37
+ @@links_regex = /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*@\)|[^\s`!()\[\]{};:\'".,<>?]))/im
38
+ @@money_regex = /(((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b)/m
39
+ @@percentages_regex = /((100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%)/m
40
+ @@phones_regex = /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/im
41
+ @@times_regex = /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/im
42
+
43
+ %w{acronyms addresses credit_cards dates emails hex_colors ipv4 ipv6 links
44
+ money percentages phones times}.each do |regex|
45
+ class_eval <<-RUBY.gsub(/^\s{6}/, ''), __FILE__, __LINE__
46
+ def self.get_#{regex}(text)
47
+ get_matches(text, @@#{regex}_regex)
48
+ end
49
+
50
+ def get_#{regex}
51
+ self.class.get_#{regex}(@text)
52
+ end
53
+ RUBY
54
+ end
55
+
56
+ def initialize(text = '')
57
+ @text = text;
58
+ end
59
+
60
+ private
61
+
62
+ def self.get_matches(text, regex)
63
+ text.scan(regex).collect{|x| x[0]}
64
+ end
65
+
66
+ end
67
+
68
+ require "commonregex/version"
@@ -0,0 +1,3 @@
1
+ class CommonRegex
2
+ VERSION = '0.1.0'
3
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commonregex
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Talysson Oliveira Cassiano
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-23 00:00:00.000000000 Z
11
+ date: 2015-01-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Find a lot of kinds of common information in a string. CommonRegex port
14
14
  for Ruby.
@@ -18,6 +18,7 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/commonregex.rb
21
+ - lib/commonregex/version.rb
21
22
  homepage: https://github.com/talyssonoc/CommonRegexRuby
22
23
  licenses:
23
24
  - MIT