keyphrase 0.2.1 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4cd803bd3889474b636e13965338038d3c4de18eb364ec406f31043a414d4127
4
- data.tar.gz: 751052a233be7ce1f035946ae67f54c00dab5a66093b02be8cad005f64e3d0f1
3
+ metadata.gz: 4b8f0d99c0a272fe649bb03145cef348efcdcaa9c7cf2e0d9bf9666d400eed63
4
+ data.tar.gz: 312e73550b15fc1ebc5bf49feff75c00d86d0d18c1e568ed29cc5fa0b3962a03
5
5
  SHA512:
6
- metadata.gz: 2dd724916edeac584010c5ab2945d83d48f24ab753ee15b70cf9cd4c282aeac6f0ad8ecaa5bc6fef9df1776eb4218e63e31115a17fdf1c18fa3f08bfa17ddef6
7
- data.tar.gz: dda819e7eea186e0f8480a13f2c089b0e550c2f4c31071b53d419e13f9fe4471329df36f6b6f312f32bfa8ae463657497897c88d32d534b4dd42b5b9aa3c3a30
6
+ metadata.gz: bebe924c5f4b03d7dc8545d351cb0ea9d6c1fa400d850d86c35cab9c7cbbdd4c2250b9b4ecb8b83b21b7406a9e34ea66c6c51112a40cd069dbdbfceca46aff46
7
+ data.tar.gz: a98850a22cf9c8a41b8a39df0a51c09a6e45150885058c695a97109c151686526f3bd299d517b5f4eb45d373c042e2d2a026cb8b0d83e555aafc30842e381092
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Keyphrase
4
- VERSION = "0.2.1"
4
+ VERSION = "0.2.3"
5
5
  end
data/lib/keyphrase.rb CHANGED
@@ -7,9 +7,9 @@ class Keyphrase
7
7
  autoload :Stoplist, "keyphrase/stoplist"
8
8
 
9
9
  CLEAN_REGEX = /([^\p{L}a-zA-Z0-9\'\- \.]|(?<!\w)\.)/ # don't remove ' because it might be part of a stop word
10
- BLACKLIST_REGEX = /(?:^|\s)[^a-zA-Z\p{L}]+\b|\'|\-/ # remove words with no letters, ie 123.23.12. And last chance to remove ' and -
11
- CLEAN_SPACES_REGEX = /\s+/
12
- SENTENCES_REGEX = /[+!?,;:&\[\]\{\}\<\>\=\/\n\t\\"\\(\\)\u2019\u2013\|]|-(?!\w)|'(?=s)|(?<!\s)\.(?![a-zA-Z0-9])|(?<!\w)\#(?=\w)/u
10
+ BLACKLIST_REGEX = /(?:^|\s)[^a-zA-Z\p{L}0-9]+\b|\'|\-/ # remove words with no letters, ie 123.23.12. And last chance to remove ' and -
11
+ CLEAN_SPACES_REGEX = /^[0-9\s\.]+$|\s+/ # last phase. Remove extra whitespace and lone numbers
12
+ SENTENCES_REGEX = /[+!?,;:&\[\]\{\}\<\>\=\/\n\t\\"\\(\\)\u2019\u2013\|]|-(?!\w)|'(?=s)|(?<!\s)\.(?![a-zA-Z0-9])|(?<!\w)\#(?=\w)|\p{Extended_Pictographic}+/u
13
13
 
14
14
  def self.analyse text, options={}
15
15
  @@keyphrase ||= Keyphrase.new
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: keyphrase
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben D'Angelo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-30 00:00:00.000000000 Z
11
+ date: 2024-01-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec