pragmatic_segmenter 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +730 -0
  7. data/Rakefile +4 -0
  8. data/lib/pragmatic_segmenter.rb +2 -0
  9. data/lib/pragmatic_segmenter/abbreviation.rb +22 -0
  10. data/lib/pragmatic_segmenter/abbreviation_replacer.rb +149 -0
  11. data/lib/pragmatic_segmenter/between_punctuation.rb +78 -0
  12. data/lib/pragmatic_segmenter/cleaner.rb +141 -0
  13. data/lib/pragmatic_segmenter/ellipsis.rb +36 -0
  14. data/lib/pragmatic_segmenter/exclamation_words.rb +19 -0
  15. data/lib/pragmatic_segmenter/languages/amharic.rb +33 -0
  16. data/lib/pragmatic_segmenter/languages/arabic.rb +83 -0
  17. data/lib/pragmatic_segmenter/languages/armenian.rb +33 -0
  18. data/lib/pragmatic_segmenter/languages/burmese.rb +33 -0
  19. data/lib/pragmatic_segmenter/languages/deutsch.rb +132 -0
  20. data/lib/pragmatic_segmenter/languages/english.rb +44 -0
  21. data/lib/pragmatic_segmenter/languages/french.rb +29 -0
  22. data/lib/pragmatic_segmenter/languages/greek.rb +29 -0
  23. data/lib/pragmatic_segmenter/languages/hindi.rb +33 -0
  24. data/lib/pragmatic_segmenter/languages/italian.rb +39 -0
  25. data/lib/pragmatic_segmenter/languages/japanese.rb +58 -0
  26. data/lib/pragmatic_segmenter/languages/persian.rb +56 -0
  27. data/lib/pragmatic_segmenter/languages/russian.rb +60 -0
  28. data/lib/pragmatic_segmenter/languages/spanish.rb +39 -0
  29. data/lib/pragmatic_segmenter/languages/urdu.rb +33 -0
  30. data/lib/pragmatic_segmenter/list.rb +169 -0
  31. data/lib/pragmatic_segmenter/number.rb +35 -0
  32. data/lib/pragmatic_segmenter/process.rb +126 -0
  33. data/lib/pragmatic_segmenter/punctuation.rb +12 -0
  34. data/lib/pragmatic_segmenter/punctuation_replacer.rb +62 -0
  35. data/lib/pragmatic_segmenter/rules.rb +38 -0
  36. data/lib/pragmatic_segmenter/segmenter.rb +81 -0
  37. data/lib/pragmatic_segmenter/sentence_boundary_punctuation.rb +17 -0
  38. data/lib/pragmatic_segmenter/single_letter_abbreviation.rb +37 -0
  39. data/lib/pragmatic_segmenter/types.rb +12 -0
  40. data/lib/pragmatic_segmenter/version.rb +3 -0
  41. data/pragmatic_segmenter.gemspec +25 -0
  42. data/spec/performance_spec.rb +24 -0
  43. data/spec/pragmatic_segmenter_spec.rb +1906 -0
  44. data/spec/spec_helper.rb +1 -0
  45. metadata +150 -0
@@ -0,0 +1 @@
1
+ require 'pragmatic_segmenter'
metadata ADDED
@@ -0,0 +1,150 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pragmatic_segmenter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Kevin S. Dias
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: 'Pragmatic Segmenter is a sentence segmentation tool for Ruby. It allows
70
+ you to split a text into an array of sentences. This gem provides 2 main benefits
71
+ over other segmentation gems - 1) It works well even with ill-formatted text 2)
72
+ It works for multiple languages '
73
+ email:
74
+ - diasks2@gmail.com
75
+ executables: []
76
+ extensions: []
77
+ extra_rdoc_files: []
78
+ files:
79
+ - ".gitignore"
80
+ - ".rspec"
81
+ - Gemfile
82
+ - LICENSE.txt
83
+ - README.md
84
+ - Rakefile
85
+ - lib/pragmatic_segmenter.rb
86
+ - lib/pragmatic_segmenter/abbreviation.rb
87
+ - lib/pragmatic_segmenter/abbreviation_replacer.rb
88
+ - lib/pragmatic_segmenter/between_punctuation.rb
89
+ - lib/pragmatic_segmenter/cleaner.rb
90
+ - lib/pragmatic_segmenter/ellipsis.rb
91
+ - lib/pragmatic_segmenter/exclamation_words.rb
92
+ - lib/pragmatic_segmenter/languages/amharic.rb
93
+ - lib/pragmatic_segmenter/languages/arabic.rb
94
+ - lib/pragmatic_segmenter/languages/armenian.rb
95
+ - lib/pragmatic_segmenter/languages/burmese.rb
96
+ - lib/pragmatic_segmenter/languages/deutsch.rb
97
+ - lib/pragmatic_segmenter/languages/english.rb
98
+ - lib/pragmatic_segmenter/languages/french.rb
99
+ - lib/pragmatic_segmenter/languages/greek.rb
100
+ - lib/pragmatic_segmenter/languages/hindi.rb
101
+ - lib/pragmatic_segmenter/languages/italian.rb
102
+ - lib/pragmatic_segmenter/languages/japanese.rb
103
+ - lib/pragmatic_segmenter/languages/persian.rb
104
+ - lib/pragmatic_segmenter/languages/russian.rb
105
+ - lib/pragmatic_segmenter/languages/spanish.rb
106
+ - lib/pragmatic_segmenter/languages/urdu.rb
107
+ - lib/pragmatic_segmenter/list.rb
108
+ - lib/pragmatic_segmenter/number.rb
109
+ - lib/pragmatic_segmenter/process.rb
110
+ - lib/pragmatic_segmenter/punctuation.rb
111
+ - lib/pragmatic_segmenter/punctuation_replacer.rb
112
+ - lib/pragmatic_segmenter/rules.rb
113
+ - lib/pragmatic_segmenter/segmenter.rb
114
+ - lib/pragmatic_segmenter/sentence_boundary_punctuation.rb
115
+ - lib/pragmatic_segmenter/single_letter_abbreviation.rb
116
+ - lib/pragmatic_segmenter/types.rb
117
+ - lib/pragmatic_segmenter/version.rb
118
+ - pragmatic_segmenter.gemspec
119
+ - spec/performance_spec.rb
120
+ - spec/pragmatic_segmenter_spec.rb
121
+ - spec/spec_helper.rb
122
+ homepage: https://github.com/diasks2/pragmatic_segmenter
123
+ licenses:
124
+ - MIT
125
+ metadata: {}
126
+ post_install_message:
127
+ rdoc_options: []
128
+ require_paths:
129
+ - lib
130
+ required_ruby_version: !ruby/object:Gem::Requirement
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ version: '0'
135
+ required_rubygems_version: !ruby/object:Gem::Requirement
136
+ requirements:
137
+ - - ">="
138
+ - !ruby/object:Gem::Version
139
+ version: '0'
140
+ requirements: []
141
+ rubyforge_project:
142
+ rubygems_version: 2.4.1
143
+ signing_key:
144
+ specification_version: 4
145
+ summary: A rule-based sentence boundary detection gem that works out-of-the-box across
146
+ many languages
147
+ test_files:
148
+ - spec/performance_spec.rb
149
+ - spec/pragmatic_segmenter_spec.rb
150
+ - spec/spec_helper.rb