entityextractor 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/extractdates.rb +50 -25
  2. metadata +2 -2
data/lib/extractdates.rb CHANGED
@@ -16,16 +16,8 @@ class ExtractDates
16
16
  c = @text.chunk
17
17
  c.each do |i|
18
18
  s = paragraph(i).segment
19
- dateExtract(s)
20
19
  s.each do |j|
21
- shash = Hash.new
22
- shash[:date] = dateExtract(j)
23
- if shash[:date]
24
- shash[:file] = file
25
- shash[:title] = j
26
- shash[:description] = i
27
- @output.push(shash)
28
- end
20
+ dateExtract(j, file, j, i)
29
21
  end
30
22
  end
31
23
  rescue
@@ -34,25 +26,58 @@ class ExtractDates
34
26
  return @output
35
27
  end
36
28
 
37
- def dateExtract(blob)
38
- # Date formats-
39
- # mm/dd/yy
40
- # mm/dd/yyyy
41
- # Month dd, yyyy
42
- # Month ddth, yyyy
43
- # Month yyyy
44
-
45
- # TOADD:
46
- # Multiple dates
47
- # Year detection
48
- # Conditional American dates
49
- # Time ranges
50
- # Filtering
51
-
29
+ def dateExtract(blob, file, title, description)
30
+ blobstring = blob.to_s
31
+
52
32
  begin
53
- return DateTime.parse(blob).to_s
33
+ if blobstring.match(/(\d{1,2})\/(\d{1,2})\/(\d{2,4})/)
34
+ save = blobstring.match(/(\d{1,2})\/(\d{1,2})\/(\d{2,4})/)
35
+ addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
36
+ elsif blobstring.match(/(\d{1,2})-(\d{1,2})-(\d{2,4})/)
37
+ save = blobstring.match(/(\d{1,2})-(\d{1,2})-(\d{2,4})/)
38
+ addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
39
+ elsif blobstring.match(/(.+?)(\w+ \d{1,2}(st|nd|rd|th|), \d{4})/)
40
+ save = blobstring.match(/(.+?)(\w+ \d{1,2}(st|nd|rd|th|), \d{4})/)
41
+ addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
42
+ elsif blobstring.match(/(.+?) ((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) \d{2}(st|nd|rd|th|)( |\)|\]))/)
43
+ save = blobstring.match(/(.+?) ((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) \d{2}(st|nd|rd|th|)( |\)|\]))/)
44
+ addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
45
+ elsif blobstring.match(/((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) [1-2][0,9]\d{2}( |\)|\]))/)
46
+ save = blobstring.match(/((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Nov(?:ember)?|Dec(?:ember)?) [1-2][0,9]\d{2}( |\)|\]))/)
47
+ addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
48
+ elsif blobstring.match(/(\d{4})-(\d{2})-(\d{2})/)
49
+ save = blobstring.match(/(\d{4})-(\d{2})-(\d{2})/)
50
+ addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
51
+ elsif blobstring.match(/(\d{4})\/(\d{2})\/(\d{2})/)
52
+ save = blobstring.match(/(\d{4})\/(\d{2})\/(\d{2})/)
53
+ addItem(DateTime.parse(blob).to_s, file, title, description, blobstring, save.to_s)
54
+ end
54
55
  rescue
55
56
  end
56
57
  end
58
+
59
+ # Adds and item to the hash
60
+ def addItem(date, file, title, description, blob, regex)
61
+ shash = Hash.new
62
+ shash[:date] = date
63
+ shash[:file] = file
64
+ shash[:title] = title
65
+ shash[:description] = description
66
+
67
+ flag = 0
68
+ @output.each do |o|
69
+ if (o[:date] == shash[:date]) && (o[:file] == shash[:file]) && (o[:title].to_s == shash[:title].to_s)
70
+ flag = 1
71
+ break
72
+ end
73
+ end
74
+
75
+ if flag == 0
76
+ @output.push(shash)
77
+ end
78
+
79
+ blob.slice! regex
80
+ dateExtract(blob, file, title, description)
81
+ end
57
82
  end
58
83
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: entityextractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-03-16 00:00:00.000000000 Z
12
+ date: 2014-03-18 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Extracts entities and terms from any JSON.
15
15
  email: shidash@shidash.com