keyphrase 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/keyphrase/version.rb +1 -1
- data/lib/keyphrase.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59f4193d2821ad7ebe8b7bdf83e5fe9b396f8c38e6ef2fe1d8e24d224a20ba27
|
4
|
+
data.tar.gz: 43ce37834a6316df02476de2efbb17d0c3c6020930697654602336f100eaf3f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dedf3654d9d48f58fe151d43810c604936760357a9a9e98a1c3676cff373c838e47ef30b36ca5155a87f4e654a2afb5affbc59313a710f03e5575be73c9266a5
|
7
|
+
data.tar.gz: 77b4a3735afd50d97dd1b71b608f3538b292182c2f92971d92eca592a4b0d75d38c728d2975517a18ae80224f3f27f5869251b14c4c56a3aef4a15bb04e24372
|
data/lib/keyphrase/version.rb
CHANGED
data/lib/keyphrase.rb
CHANGED
@@ -7,8 +7,8 @@ class Keyphrase
|
|
7
7
|
autoload :Stoplist, "keyphrase/stoplist"
|
8
8
|
|
9
9
|
CLEAN_REGEX = /([^\p{L}a-zA-Z0-9\'\- \.]|(?<!\w)\.)/ # don't remove ' because it might be part of a stop word
|
10
|
-
BLACKLIST_REGEX = /(?:^|\s)[^a-zA-Z\p{L}]+\b|\'|\-/ # remove words with no letters, ie 123.23.12. And last chance to remove ' and -
|
11
|
-
CLEAN_SPACES_REGEX =
|
10
|
+
BLACKLIST_REGEX = /(?:^|\s)[^a-zA-Z\p{L}0-9]+\b|\'|\-/ # remove words with no letters, ie 123.23.12. And last chance to remove ' and -
|
11
|
+
CLEAN_SPACES_REGEX = /^[0-9\s\.]+$|\s+/ # last phase. Remove extra whitespace and lone numbers
|
12
12
|
SENTENCES_REGEX = /[+!?,;:&\[\]\{\}\<\>\=\/\n\t\\"\\(\\)\u2019\u2013\|]|-(?!\w)|'(?=s)|(?<!\s)\.(?![a-zA-Z0-9])|(?<!\w)\#(?=\w)/u
|
13
13
|
|
14
14
|
def self.analyse text, options={}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: keyphrase
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben D'Angelo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|