keyphrase 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6bd137b3873f8a008fb32e9ba2b857fb0fccb94c4f8c0c98dd1a4773a3588ec4
4
- data.tar.gz: 26cd3c7ed4f030b0efb30dd721278974ea84085c7532124fe8f998d9e38513dc
3
+ metadata.gz: 59f4193d2821ad7ebe8b7bdf83e5fe9b396f8c38e6ef2fe1d8e24d224a20ba27
4
+ data.tar.gz: 43ce37834a6316df02476de2efbb17d0c3c6020930697654602336f100eaf3f6
5
5
  SHA512:
6
- metadata.gz: 81d251b04a8cd2885e344e5eda291ae58ed50593d354bce153982a804d36e579509ff2c02a7f536721b282f24b1884b36b55df276006a6517d6192412319a908
7
- data.tar.gz: 83bf1c033007cad120f504d8a9fb5eedeb496171889f42408872950186f20f2ceb8a5dc7f49f0b402dfe91f9b61559cd7488f603508628dd32726b121789f018
6
+ metadata.gz: dedf3654d9d48f58fe151d43810c604936760357a9a9e98a1c3676cff373c838e47ef30b36ca5155a87f4e654a2afb5affbc59313a710f03e5575be73c9266a5
7
+ data.tar.gz: 77b4a3735afd50d97dd1b71b608f3538b292182c2f92971d92eca592a4b0d75d38c728d2975517a18ae80224f3f27f5869251b14c4c56a3aef4a15bb04e24372
@@ -8,8 +8,6 @@ class Keyphrase
8
8
  'tis
9
9
  'twas
10
10
  've
11
- 10
12
- 39
13
11
  a
14
12
  a's
15
13
  able
@@ -4,7 +4,6 @@
4
4
  def self.stopwords
5
5
  @@stopwords ||= [
6
6
  "ਦੇ",
7
- "0",
8
7
  "ਵਿੱਚ",
9
8
  "ਦਾ",
10
9
  "ਅਤੇ",
@@ -18,7 +17,6 @@
18
17
  "ਨੇ",
19
18
  "ਤੇ",
20
19
  "ਨਾਲ",
21
- "1",
22
20
  "ਲਈ",
23
21
  "ਵੀ",
24
22
  "ਸੀ",
@@ -30,7 +28,6 @@
30
28
  "ਹਨ",
31
29
  "ਜਾਂਦਾ",
32
30
  "ਕੀਤਾ",
33
- "2",
34
31
  "ਗਿਆ",
35
32
  "ਹੀ",
36
33
  "ਕੇ",
@@ -47,7 +44,6 @@
47
44
  "ਨਹੀਂ",
48
45
  "ਭਾਰਤੀ",
49
46
  "ਪਿੰਡ",
50
- "3",
51
47
  "ਸਿੰਘ",
52
48
  "ਉੱਤੇ",
53
49
  "ਸਾਲ",
@@ -65,7 +61,6 @@
65
61
  "ਪਰ",
66
62
  "ਦੁਆਰਾ",
67
63
  "ਰੂਪ",
68
- "4",
69
64
  "ਹੋਰ",
70
65
  "ਕੰਮ",
71
66
  "ਆਪਣੀ",
@@ -80,7 +75,6 @@
80
75
  "ਜਾ",
81
76
  "ਵਾਲੇ",
82
77
  "ਸ਼ੁਰੂ",
83
- "5",
84
78
  "ਉਸਨੇ",
85
79
  "ਕਿਹਾ",
86
80
  "ਹੋਣ",
@@ -103,7 +97,6 @@
103
97
  "ਹੁੰਦੇ",
104
98
  "ਸ਼ਹਿਰ",
105
99
  "ਭਾਸ਼ਾ",
106
- "6",
107
100
  "ਹੋਈ",
108
101
  "ਅਨੁਸਾਰ",
109
102
  "ਸਕਦਾ",
@@ -133,7 +126,6 @@
133
126
  "ਨਾਂ",
134
127
  "ਦੌਰਾਨ",
135
128
  "ਤਰ੍ਹਾਂ",
136
- "7",
137
129
  "ਯੂਨੀਵਰਸਿਟੀ",
138
130
  "ਨਾ",
139
131
  "ਏ",
@@ -149,7 +141,6 @@
149
141
  "ਅੰਗਰੇਜ਼ੀ",
150
142
  "ਉਸਨੂੰ",
151
143
  "ਉਹਨਾਂ",
152
- "8",
153
144
  "ਸਥਿਤ",
154
145
  "ਫਿਰ",
155
146
  "ਜੀਵਨ",
@@ -170,13 +161,11 @@
170
161
  "ਉਮਰ",
171
162
  "ਬਲਾਕ",
172
163
  "ਰਹੇ",
173
- "10",
174
164
  "ਸਾਹਿਬ",
175
165
  "ਕਰਦੀ",
176
166
  "ਹਰ",
177
167
  "ਪੈਦਾ",
178
168
  "ਘੱਟ",
179
- "9",
180
169
  "ਲੇਖਕ",
181
170
  "ਹਿੱਸਾ",
182
171
  "ਫ਼ਿਲਮ",
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Keyphrase
4
- VERSION = "0.2.0"
4
+ VERSION = "0.2.2"
5
5
  end
data/lib/keyphrase.rb CHANGED
@@ -7,8 +7,8 @@ class Keyphrase
7
7
  autoload :Stoplist, "keyphrase/stoplist"
8
8
 
9
9
  CLEAN_REGEX = /([^\p{L}a-zA-Z0-9\'\- \.]|(?<!\w)\.)/ # don't remove ' because it might be part of a stop word
10
- BLACKLIST_REGEX = /(?:^|\s)[^a-zA-Z\p{L}]+\b|\'|\-/ # remove words with no letters, ie 123.23.12. And last chance to remove ' and -
11
- CLEAN_SPACES_REGEX = /\s+/
10
+ BLACKLIST_REGEX = /(?:^|\s)[^a-zA-Z\p{L}0-9]+\b|\'|\-/ # remove words with no letters, ie 123.23.12. And last chance to remove ' and -
11
+ CLEAN_SPACES_REGEX = /^[0-9\s\.]+$|\s+/ # last phase. Remove extra whitespace and lone numbers
12
12
  SENTENCES_REGEX = /[+!?,;:&\[\]\{\}\<\>\=\/\n\t\\"\\(\\)\u2019\u2013\|]|-(?!\w)|'(?=s)|(?<!\s)\.(?![a-zA-Z0-9])|(?<!\w)\#(?=\w)/u
13
13
 
14
14
  def self.analyse text, options={}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: keyphrase
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben D'Angelo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-30 00:00:00.000000000 Z
11
+ date: 2024-01-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec