entityextractor 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  require 'json'
2
+ load 'extractdates.rb'
2
3
 
3
4
  class EntityExtractor
4
5
  def initialize(input, *extractfield)
@@ -109,6 +110,14 @@ class EntityExtractor
109
110
  i["extract"] = addlist
110
111
  @output.push(i)
111
112
 
113
+ # Extract dates
114
+ elsif type == "date"
115
+ @extractfield.each do |f|
116
+ d = ExtractDates.new(i[f])
117
+ outhash = d.chunk(i["path"])
118
+ @output.push(outhash)
119
+ end
120
+
112
121
  # Extract both set terms and ALLCAPS
113
122
  elsif type == "both"
114
123
  @extractfield.each do |f|
@@ -123,3 +132,4 @@ class EntityExtractor
123
132
  end
124
133
  end
125
134
  end
135
+
@@ -0,0 +1,58 @@
1
+ require 'treat'
2
+ include Treat::Core::DSL
3
+ require 'date'
4
+ require 'json'
5
+ require 'american_date'
6
+
7
+ class ExtractDates
8
+ def initialize(text)
9
+ @text = text
10
+ @output = Array.new
11
+ end
12
+
13
+ def chunk(file)
14
+ if @text
15
+ begin
16
+ c = @text.chunk
17
+ c.each do |i|
18
+ s = paragraph(i).segment
19
+ dateExtract(s)
20
+ s.each do |j|
21
+ shash = Hash.new
22
+ shash[:date] = dateExtract(j)
23
+ if shash[:date]
24
+ shash[:file] = file
25
+ shash[:title] = j
26
+ shash[:description] = i
27
+ @output.push(shash)
28
+ end
29
+ end
30
+ end
31
+ rescue
32
+ end
33
+ end
34
+ return @output
35
+ end
36
+
37
+ def dateExtract(blob)
38
+ # Date formats-
39
+ # mm/dd/yy
40
+ # mm/dd/yyyy
41
+ # Month dd, yyyy
42
+ # Month ddth, yyyy
43
+ # Month yyyy
44
+
45
+ # TOADD:
46
+ # Multiple dates
47
+ # Year detection
48
+ # Conditional American dates
49
+ # Time ranges
50
+ # Filtering
51
+
52
+ begin
53
+ return DateTime.parse(blob).to_s
54
+ rescue
55
+ end
56
+ end
57
+ end
58
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: entityextractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-03-01 00:00:00.000000000 Z
12
+ date: 2014-03-16 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Extracts entities and terms from any JSON.
15
15
  email: shidash@shidash.com
@@ -17,6 +17,7 @@ executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
+ - lib/extractdates.rb
20
21
  - lib/entityextractor.rb
21
22
  homepage: https://github.com/Shidash/EntityExtractor
22
23
  licenses: