excite 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/.gitignore +11 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +8 -0
  4. data/Gemfile.lock +69 -0
  5. data/LICENSE +22 -0
  6. data/README.md +46 -0
  7. data/Rakefile +24 -0
  8. data/config/citation_cleanup_rules.yml +68 -0
  9. data/config/parscit_features.yml +55 -0
  10. data/excite.gemspec +30 -0
  11. data/lib/excite/array_helpers.rb +27 -0
  12. data/lib/excite/citation.rb +48 -0
  13. data/lib/excite/crfparser.rb +322 -0
  14. data/lib/excite/postprocessor.rb +252 -0
  15. data/lib/excite/preprocessor.rb +107 -0
  16. data/lib/excite/resources/dicts/female-names +4954 -0
  17. data/lib/excite/resources/dicts/first-names +27926 -0
  18. data/lib/excite/resources/dicts/male-names +3901 -0
  19. data/lib/excite/resources/dicts/months +24 -0
  20. data/lib/excite/resources/dicts/places +43109 -0
  21. data/lib/excite/resources/dicts/publishers +654 -0
  22. data/lib/excite/resources/dicts/surnames +146259 -0
  23. data/lib/excite/resources/html.template +84 -0
  24. data/lib/excite/resources/html_model +0 -0
  25. data/lib/excite/resources/model +0 -0
  26. data/lib/excite/resources/parsCit.template +76 -0
  27. data/lib/excite/resources/trainingdata/tagged_html_references.txt +500 -0
  28. data/lib/excite/resources/trainingdata/tagged_references.txt +500 -0
  29. data/lib/excite/resources/trainingdata/verify.rb +97 -0
  30. data/lib/excite/token_features.rb +313 -0
  31. data/lib/excite/version.rb +7 -0
  32. data/lib/excite.rb +13 -0
  33. data/model/test/analysis.csv +54 -0
  34. data/model/test/array_helpers.rb +30 -0
  35. data/model/test/html-analysis.csv +60 -0
  36. data/model/test/html-output.txt +19893 -0
  37. data/model/test/model_test.rb +306 -0
  38. data/model/test/output.txt +16742 -0
  39. data/spec/excite/citation_spec.rb +128 -0
  40. data/spec/excite/crfparser_spec.rb +118 -0
  41. data/spec/excite/postprocessor_spec.rb +68 -0
  42. data/spec/excite/token_features_spec.rb +641 -0
  43. data/spec/spec_helper.rb +4 -0
  44. metadata +222 -0
metadata ADDED
@@ -0,0 +1,222 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: excite
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.1.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - David Judd
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-03-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: openurl
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: activesupport
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: crfpp
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: nokogiri
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rake
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: rspec
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: pry
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: pry-debugger
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ description:
143
+ email:
144
+ - david@academia.edu
145
+ executables: []
146
+ extensions: []
147
+ extra_rdoc_files: []
148
+ files:
149
+ - .gitignore
150
+ - .rspec
151
+ - Gemfile
152
+ - Gemfile.lock
153
+ - LICENSE
154
+ - README.md
155
+ - Rakefile
156
+ - config/citation_cleanup_rules.yml
157
+ - config/parscit_features.yml
158
+ - excite.gemspec
159
+ - lib/excite.rb
160
+ - lib/excite/array_helpers.rb
161
+ - lib/excite/citation.rb
162
+ - lib/excite/crfparser.rb
163
+ - lib/excite/postprocessor.rb
164
+ - lib/excite/preprocessor.rb
165
+ - lib/excite/resources/dicts/female-names
166
+ - lib/excite/resources/dicts/first-names
167
+ - lib/excite/resources/dicts/male-names
168
+ - lib/excite/resources/dicts/months
169
+ - lib/excite/resources/dicts/places
170
+ - lib/excite/resources/dicts/publishers
171
+ - lib/excite/resources/dicts/surnames
172
+ - lib/excite/resources/html.template
173
+ - lib/excite/resources/html_model
174
+ - lib/excite/resources/model
175
+ - lib/excite/resources/parsCit.template
176
+ - lib/excite/resources/trainingdata/tagged_html_references.txt
177
+ - lib/excite/resources/trainingdata/tagged_references.txt
178
+ - lib/excite/resources/trainingdata/verify.rb
179
+ - lib/excite/token_features.rb
180
+ - lib/excite/version.rb
181
+ - model/test/analysis.csv
182
+ - model/test/array_helpers.rb
183
+ - model/test/html-analysis.csv
184
+ - model/test/html-output.txt
185
+ - model/test/model_test.rb
186
+ - model/test/output.txt
187
+ - spec/excite/citation_spec.rb
188
+ - spec/excite/crfparser_spec.rb
189
+ - spec/excite/postprocessor_spec.rb
190
+ - spec/excite/token_features_spec.rb
191
+ - spec/spec_helper.rb
192
+ homepage: http://github.com/academia-edu/free_cite
193
+ licenses: []
194
+ post_install_message:
195
+ rdoc_options: []
196
+ require_paths:
197
+ - lib
198
+ required_ruby_version: !ruby/object:Gem::Requirement
199
+ none: false
200
+ requirements:
201
+ - - ! '>='
202
+ - !ruby/object:Gem::Version
203
+ version: '0'
204
+ required_rubygems_version: !ruby/object:Gem::Requirement
205
+ none: false
206
+ requirements:
207
+ - - ! '>='
208
+ - !ruby/object:Gem::Version
209
+ version: '0'
210
+ requirements: []
211
+ rubyforge_project:
212
+ rubygems_version: 1.8.23
213
+ signing_key:
214
+ specification_version: 3
215
+ summary: Parse citations
216
+ test_files:
217
+ - spec/excite/citation_spec.rb
218
+ - spec/excite/crfparser_spec.rb
219
+ - spec/excite/postprocessor_spec.rb
220
+ - spec/excite/token_features_spec.rb
221
+ - spec/spec_helper.rb
222
+ has_rdoc: