groupie 0.2.2 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: dd7e47aef7d4ed19c206e46d5eb716991562ee2c257c5cf8488341f646801f2f
4
+ data.tar.gz: 692108e3c8c2b7d4b26a3c7d702133d0a5e01324d0c42cd9c4143b3a4b601b6c
5
+ SHA512:
6
+ metadata.gz: dba999961a8c6d7ba2999770d9125dc1c4b2c6468452c115a0eeb3256d8d484ef0cba8d975053e5e4e7d907afaf402063fb0b75c6d1c24b5bd3dc7698a251f35
7
+ data.tar.gz: cb5d99d029d237a37354b81b97a4d0a03b6224040ddbbbd0cd96cb3536ea0cfe4ecfb55871e35749b133df0178c97937f2b98523dc3504e4620b3a2473403478
@@ -0,0 +1,32 @@
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ ## Describe the bug
11
+
12
+ A clear and concise description of what the bug is.
13
+
14
+ ## How to reproduce it
15
+
16
+ Include the minimum code sample required to reproduce the bug.
17
+
18
+ ## Expected behavior
19
+
20
+ A clear and concise description of what you expected to happen.
21
+
22
+ ## Context
23
+
24
+ Please describe:
25
+
26
+ * What version of Groupie did you use? (`bundle list|grep groupie`)
27
+ * What version of Ruby did you use? (`ruby -v`)
28
+ * What platform and architecture do you use? (macOS version (Intel vs Apple M1), Windows version, Linux distro/architecture)
29
+
30
+ ## Additional context
31
+
32
+ Add any other context about the problem here.
@@ -0,0 +1,24 @@
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ ## Is your feature request related to a problem? Please describe.
11
+
12
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
13
+
14
+ ## Describe the solution you'd like
15
+
16
+ A clear and concise description of what you want to happen.
17
+
18
+ ## Describe alternatives you've considered
19
+
20
+ A clear and concise description of any alternative solutions or features you've considered.
21
+
22
+ ## Additional context
23
+
24
+ Add any other context or screenshots about the feature request here.
@@ -0,0 +1,13 @@
1
+ # https://docs.github.com/en/github/administering-a-repository/keeping-your-dependencies-updated-automatically
2
+ # There's a lot of options, but for now let's keep it simple.
3
+ # Check every week (Monday 5:00 CET by default) for updates.
4
+ # Each package manager gets 5 non-security updates by default.
5
+ # Security updates bypass most configuration here and show up when found.
6
+ # We rely on DepFu for Ruby and Gem updates
7
+ version: 2
8
+ updates:
9
+ - package-ecosystem: github-actions
10
+ # Github Actions are checked for updates
11
+ directory: '/'
12
+ schedule:
13
+ interval: weekly
@@ -0,0 +1,16 @@
1
+ name: Gem building
2
+
3
+ on: [ pull_request ]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ steps:
9
+ - uses: actions/checkout@v2
10
+ - name: Set up Ruby
11
+ uses: ruby/setup-ruby@v1
12
+ with:
13
+ ruby-version: 3.0
14
+ bundler-cache: true
15
+ - name: Build the gem
16
+ run: bundle exec rake build
@@ -0,0 +1,22 @@
1
+ name: RSpec
2
+
3
+ on: [ push, pull_request ]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ strategy:
9
+ matrix:
10
+ # Maintained versions: 2.7, 3.0, 3.1
11
+ # Security updates only: 2.6 (EOL: 2022-03-31)
12
+ # Source: https://www.ruby-lang.org/en/downloads/branches/
13
+ ruby: [ 2.6, 2.7, 3.0, 3.1 ]
14
+ steps:
15
+ - uses: actions/checkout@v2
16
+ - name: Set up Ruby
17
+ uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: ${{ matrix.ruby }}
20
+ bundler-cache: true
21
+ - name: Run the tests
22
+ run: bundle exec rspec
@@ -0,0 +1,26 @@
1
+ name: Code Quality
2
+ on: [ pull_request ]
3
+ jobs:
4
+ rubocop:
5
+ name: Rubocop
6
+ runs-on: ubuntu-latest
7
+ steps:
8
+ - name: Check out code
9
+ uses: actions/checkout@v2
10
+ - name: Install Ruby & Gems
11
+ uses: ruby/setup-ruby@v1 # Uses .ruby-version as version input
12
+ with:
13
+ ruby-version: 3.0
14
+ bundler-cache: true
15
+ - name: Rubocop
16
+ # https://github.com/reviewdog/action-rubocop
17
+ uses: reviewdog/action-rubocop@v2
18
+ with:
19
+ fail_on_error: true
20
+ filter_mode: nofilter
21
+ github_token: ${{ secrets.github_token }}
22
+ reporter: github-pr-review
23
+ rubocop_version: gemfile
24
+ # Rely on Bundler-installed gems so don't install them again
25
+ use_bundler: true
26
+ skip_install: true
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,53 @@
1
+ require:
2
+ - rubocop-rspec
3
+ - rubocop-rake
4
+ - rubocop-performance
5
+
6
+ AllCops:
7
+ # We bump the version to get new cops, so enable them by default
8
+ NewCops: enable
9
+
10
+ # Two lines should fit next to each other in split view on a widescreen
11
+ Layout/LineLength:
12
+ Max: 120
13
+
14
+ # We still have old-style rspec checks, so this triggers on functional comparisons there
15
+ Lint/Void:
16
+ Exclude:
17
+ - 'spec/**/*_spec.rb'
18
+
19
+ # RSpec has a lot of blocks, so ignore this rule there
20
+ Metrics/BlockLength:
21
+ Exclude:
22
+ - 'spec/**/*_spec.rb'
23
+
24
+ # I prefer to see the class name over "described_class"
25
+ RSpec/DescribedClass:
26
+ EnforcedStyle: explicit
27
+
28
+ # I prefer groups for structure, so the defaults are a little too strict for me
29
+ RSpec/NestedGroups:
30
+ Max: 4
31
+
32
+ # I prefer more verbose examples, so tend to use more lines than the defaults
33
+ RSpec/ExampleLength:
34
+ Max: 20
35
+
36
+ # For strings I enjoy using %w[], but for symbols the %i[] syntax just does not click.
37
+ Style/SymbolArray:
38
+ EnforcedStyle: brackets
39
+
40
+ # Indentation is something I've got strong opinions about which differ from Rubocop.
41
+ Layout/ArgumentAlignment:
42
+ EnforcedStyle: with_fixed_indentation # default is with_first_argument
43
+ Layout/ArrayAlignment:
44
+ EnforcedStyle: with_fixed_indentation # default is with_first_element
45
+ Layout/FirstArgumentIndentation:
46
+ EnforcedStyle: consistent # default is special_for_inner_method_call_in_parentheses
47
+ Layout/FirstArrayElementIndentation:
48
+ EnforcedStyle: consistent # default is special_inside_parentheses
49
+ Layout/FirstHashElementIndentation:
50
+ EnforcedStyle: consistent # default is special_inside_parentheses
51
+ # Let's enforce this to be consistent
52
+ Layout/EndOfLine:
53
+ EnforcedStyle: lf # \n (unix line end) enforced everywhere, default is native
data/CHANGELOG.md ADDED
@@ -0,0 +1,107 @@
1
+ ## Unreleased changes
2
+
3
+ ## Version 0.5.0 -- 2022-02-16
4
+
5
+ This release has breaking changes (deprecation cleanup and internals rework), a new feature (smart weights!) and is officially tested on Ruby 3.1.0 (it's what I use). I've enabled the setting to require MFA to publish this gem, to help protect those who use it.
6
+
7
+ - Breaking: remove `String#tokenize` core extension; please use `Groupie.tokenize(string)` instead [#39](https://github.com/Narnach/groupie/pull/39)
8
+ - Breaking: due to changed internals, YAML serialized data from 0.4.x will lack some of the new internal caches. I'd suggest loading the old data and adding the words from each group to a new (0.5.x) instance of Groupie. [#40](https://github.com/Narnach/groupie/pull/40)
9
+ - Feat: add support for smart default weights, reducing the effect of low data on predictions [#40](https://github.com/Narnach/groupie/pull/40)
10
+ - Deps: add Ruby 3.1 to list of tested & supported gems
11
+ - Chore: require multi-factor authentication to publish gem updates
12
+ - Chore: add Security.md to advertise a security policy
13
+ - Style: addressed Lint/AmbiguousOperatorPrecedence
14
+ - Dev: bump development dependencies multiple times
15
+ - Dev: switch to DepFu to manage development dependencies
16
+
17
+ ## Version 0.4.1 -- 2021-09-08
18
+
19
+ Non-functional fixes to the CI config and Rubygems.org metadata.
20
+
21
+ - Fix: correct changelog uri for gem
22
+ - CI: fix dependabot config
23
+
24
+ ## Version 0.4.0 -- 2021-09-07
25
+
26
+ Welcome to 2021, where Ruby version 2.6 is the lowest with official support, Bundler is the default for managing packages and RSpec version 3 is used to test things. This version updates Groupie into this decade.
27
+
28
+ - Refactor: update Groupie to 2021 standards
29
+ - Feat: raise Groupie::Error instead of RuntimeError
30
+ - Feat: deprecate String#tokenize in favor of Groupie.tokenize
31
+ - Doc: document API of Groupie
32
+ - Doc: update readme with examples
33
+ - Refactor: reorder Groupie methods by importance
34
+ - Refactor: simplify Groupie#classify
35
+ - Refactor: reduce complexity of Groupie#unique_words
36
+ - Refactor: simplify Groupie#classify\_text
37
+
38
+ ## Version 0.3.0 -- 2010-07-29
39
+
40
+ Multiple changes to the 'unique words' strategy, hopefully improving the behavior.
41
+
42
+ - Cache unique words in an instance var to reduce time required to do subsequent lookups
43
+ - Sanity spec
44
+ - Unique strategy now includes all words except for the global 4th quartile
45
+ - Unique strategy changed yet again: only ignore words that occur more than their group's median
46
+ - Unique strategy now behaves like sqrt that only checks unique words
47
+ - Unique word finder uses less elegant but (hopefully) faster code
48
+ - Removed gemspec
49
+
50
+ ## Version 0.2.3 -- 2010-07-29
51
+
52
+ Add a new 'unique words' strategy, which ignores words that occur in all categories.
53
+
54
+ - Added 'unique' classification strategy
55
+ - Added Group#<< as alias for Group#add
56
+ - Updated readme
57
+
58
+ ## Version 0.2.2 -- 2010-07-25
59
+
60
+ Bugfix for log10 strategy.
61
+
62
+ - Fixed log10 strategy counting for Groupie.classify
63
+
64
+ ## Version 0.2.1 -- 2010-07-25
65
+
66
+ Offer multiple ways to weigh word counts in calculating final scores.
67
+
68
+ - Added sqrt and log word counting strategies
69
+
70
+ ## Version 0.2.0 -- 2010-07-25
71
+
72
+ Classification can't raise division by zero errors anymore.
73
+
74
+ - Groupie.classify_text ignores unclassified tokens
75
+
76
+ ## Version 0.1.1 -- 2010-07-25
77
+
78
+ Swap test framework and tokenization improvements.
79
+
80
+ - Regenerated gemspec
81
+ - Strip quotes from tokens
82
+ - Replaced testy tests with rspec
83
+
84
+ ## Version 0.1.0 -- 2010-07-25
85
+
86
+ The initial release as a gem, after working on this on/off over a year.
87
+
88
+ - Added gemspec
89
+ - Fixed text classification to properly average group scores
90
+ - Added test for classifying tokenized html email spam
91
+ - Classification of texts is now possible
92
+ - Added readme and MIT license
93
+ - Test the full html and headers of tokenized emails
94
+ - Support infix commas for tokenized strings
95
+ - Allow infix dots in tokenized strings
96
+ - Strip HTML tags when sanitizing a string
97
+ - Classify common words based on tokenized text from spam.la e-mails
98
+ - Added String#tokenize
99
+ - Ensure a Group will still work when loaded from YAML
100
+ - Added test helper file
101
+ - Refactored Group to maintain a Hash of words and counts instead of a list of words
102
+ - Removed obsolete method
103
+ - Added testcase for three groups
104
+ - Support multiple examples to add more weight to their grouping
105
+ - Renamed tests to reflect intent of content
106
+ - Classification now allows for a degree of certainty
107
+ - Implemented simple spam check
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in groupie.gemspec
6
+ gemspec
7
+
8
+ gem 'psych', '~> 4.0'
9
+ gem 'rake', '~> 13.0'
10
+ gem 'rspec', '~> 3.0'
11
+ gem 'rubocop', '~> 1.7'
12
+ gem 'rubocop-performance', '~> 1.11'
13
+ gem 'rubocop-rake', '~> 0.6.0'
14
+ gem 'rubocop-rspec', '~> 2.4'
data/Gemfile.lock ADDED
@@ -0,0 +1,71 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ groupie (0.5.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.2)
10
+ diff-lcs (1.5.0)
11
+ parallel (1.21.0)
12
+ parser (3.1.0.0)
13
+ ast (~> 2.4.1)
14
+ psych (4.0.3)
15
+ stringio
16
+ rainbow (3.1.1)
17
+ rake (13.0.6)
18
+ regexp_parser (2.2.1)
19
+ rexml (3.2.5)
20
+ rspec (3.11.0)
21
+ rspec-core (~> 3.11.0)
22
+ rspec-expectations (~> 3.11.0)
23
+ rspec-mocks (~> 3.11.0)
24
+ rspec-core (3.11.0)
25
+ rspec-support (~> 3.11.0)
26
+ rspec-expectations (3.11.0)
27
+ diff-lcs (>= 1.2.0, < 2.0)
28
+ rspec-support (~> 3.11.0)
29
+ rspec-mocks (3.11.0)
30
+ diff-lcs (>= 1.2.0, < 2.0)
31
+ rspec-support (~> 3.11.0)
32
+ rspec-support (3.11.0)
33
+ rubocop (1.25.1)
34
+ parallel (~> 1.10)
35
+ parser (>= 3.1.0.0)
36
+ rainbow (>= 2.2.2, < 4.0)
37
+ regexp_parser (>= 1.8, < 3.0)
38
+ rexml
39
+ rubocop-ast (>= 1.15.1, < 2.0)
40
+ ruby-progressbar (~> 1.7)
41
+ unicode-display_width (>= 1.4.0, < 3.0)
42
+ rubocop-ast (1.15.2)
43
+ parser (>= 3.0.1.1)
44
+ rubocop-performance (1.13.2)
45
+ rubocop (>= 1.7.0, < 2.0)
46
+ rubocop-ast (>= 0.4.0)
47
+ rubocop-rake (0.6.0)
48
+ rubocop (~> 1.0)
49
+ rubocop-rspec (2.8.0)
50
+ rubocop (~> 1.19)
51
+ ruby-progressbar (1.11.0)
52
+ stringio (3.0.1)
53
+ unicode-display_width (2.1.0)
54
+
55
+ PLATFORMS
56
+ x86_64-darwin-20
57
+ x86_64-darwin-21
58
+ x86_64-linux
59
+
60
+ DEPENDENCIES
61
+ groupie!
62
+ psych (~> 4.0)
63
+ rake (~> 13.0)
64
+ rspec (~> 3.0)
65
+ rubocop (~> 1.7)
66
+ rubocop-performance (~> 1.11)
67
+ rubocop-rake (~> 0.6.0)
68
+ rubocop-rspec (~> 2.4)
69
+
70
+ BUNDLED WITH
71
+ 2.3.4
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2009-2021 Wes Oldenbeuving
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,140 @@
1
+ # Groupie
2
+
3
+ [![Depfu](https://badges.depfu.com/badges/367956233b3b31a6fc19db4515263b9e/overview.svg)](https://depfu.com/github/Narnach/groupie?project_id=34004)
4
+
5
+ Groupie is a simple way to group texts and classify new texts as being a likely member of one of the defined groups. Think of bayesian spam filters.
6
+
7
+ The eventual goal is to have Groupie work as a sort of bayesian spam filter, where you feed it spam and ham (non-spam) and ask it to classify new texts as spam or ham. Applications for this are e-mail spam filtering and blog spam filtering. Other sorts of categorizing might be interesting as well, such as finding suitable tags for a blog post or bookmark.
8
+
9
+ Started and forgotten in 2009 as a short-lived experiment, in 2010 Groupie got new features when I started using it on a RSS reader project that classified news items into "Interesting" and "Not interesting" categories.
10
+
11
+ ## Current functionality
12
+
13
+ Current funcionality includes:
14
+
15
+ * Tokenize an input text to prepare it for grouping.
16
+ * Strip XML and HTML tag.
17
+ * Keep certain infix characters, such as period and comma.
18
+ * Add texts (as an Array of Strings) to any number of groups.
19
+ * Classify a single word to check the likelihood it belongs to each group.
20
+ * Do classification for complete (tokenized) texts.
21
+ * Pick classification strategy to weigh repeat words differently (weigh by sum, square root or log10 of words in group)
22
+
23
+ ## Installation
24
+
25
+ Add this line to your application's Gemfile:
26
+
27
+ ```ruby
28
+ gem 'groupie'
29
+ ```
30
+
31
+ You can also perform this to do this for you:
32
+
33
+ bundle add groupie
34
+
35
+ And then execute:
36
+
37
+ bundle install
38
+
39
+ Or install it system-wide via:
40
+
41
+ gem install groupie
42
+
43
+ ## Usage
44
+
45
+ Here is an annotated console session that shows off the features available in Groupie.
46
+
47
+ ```ruby
48
+ # Instantiate a new Groupie instance
49
+ groupie = Groupie.new
50
+
51
+ # Groups are defined as you use them, so let's get started by adding some pre-tokenized words
52
+ groupie[:spam].add(%w[this is obvious spam please buy our product])
53
+ groupie[:spam].add(%w[hello friend this is rich prince i have awesome bitcoin for you])
54
+ groupie[:ham].add(%w[you are invited to my awesome party just click the link to rsvp])
55
+
56
+ # Is your data less than clean? We've got a tokenizer for that!
57
+ tokens = Groupie.tokenize('Please give me your password so I can haxx0r you!')
58
+ # => ["please", "give", "me", "your", "password", "so", "i", "can", "haxx0r", "you"]
59
+ groupie[:spam].add(tokens)
60
+
61
+ # So, now let's attempt to classify a text and see if it's spam or ham:
62
+ test_tokens = %w[please click the link to reset your password for our awesome product]
63
+ groupie.classify_text(test_tokens)
64
+ # => {:spam=>0.5909090909090909, :ham=>0.4090909090909091}
65
+ # As you can see, this password reset email looks a little dodgy...
66
+ # We have multiple strategies for drawing conclusions about what group it belongs to.
67
+ # The default you saw above is :sum, it weighs each word by the total sum of occurrences.
68
+ # Let's see if it looks less bad by using a different classification strategies.
69
+
70
+ # Log reduces the weight of each word to the log10 of its occurrence count:
71
+ # - Count 1 is weight 0
72
+ # - Count 10 is weight 1
73
+ # - Count 100 is weight 2
74
+ groupie.classify_text(test_tokens, :log)
75
+ # => {:spam=>0.5, :ham=>0.5}
76
+ # This is even more even, most likely because it ignores all single-count words...
77
+
78
+ # Square root algorithm is less harsh, it reduces the weight of each word to the square root of the count:
79
+ # - Count 1 is weight 1
80
+ # - Count 4 is weight 2
81
+ # - Count 9 is weight 3
82
+ groupie.classify_text(test_tokens, :sqrt)
83
+ # => {:spam=>0.5909090909090909, :ham=>0.4090909090909091}
84
+ # This seems to result in the same value as :sum
85
+
86
+ # Unique uses the same weighting algorithm as the square root, but it modifies the word dictionary:
87
+ # it discards the 25% most common words, so less common words gain higher predictive power.
88
+ groupie.classify_text(test_tokens, :unique)
89
+ # => {:spam=>0.625, :ham=>0.375}
90
+ # This looks even worse for our poor password reset email.
91
+ # In case you're curious, the ignored words in this case are:
92
+ test_tokens - (test_tokens & groupie.unique_words)
93
+ # => ["please", "to", "reset", "awesome"]
94
+ # If you'd be classifying email, you can assume that common email headers will get ignored this way.
95
+
96
+ # If you're just starting out, your incomplete data could lead to dramatic misrepresentations of the data.
97
+ # To balance against this, you can enable smart weight:
98
+ groupie.smart_weight = true
99
+ # You could also set it during initialization via Groupie.new(smart_weight: true)
100
+ # What's so useful about it? It adds a default weight to _all_ words, even the ones you haven't
101
+ # seen yet, which counter-acts the data you have. This shines in low data situations,
102
+ # reducing the impact of the few words you have seen before.
103
+ groupie.default_weight
104
+ # => 1.2285714285714286
105
+ # Classifying the same text as before should consider all words, and add this default weight to all words
106
+ # It basically gives all groups the likelihood of "claiming" a word,
107
+ # unless there is strong data to suggest otherwise.
108
+ groupie.classify_text(test_tokens)
109
+ # => {:spam=>0.5241046831955923, :ham=>0.4758953168044077}
110
+ ```
111
+
112
+ Persistence can be naively done by using YAML:
113
+
114
+ ```ruby
115
+ # Instantiate a new Groupie instance
116
+ groupie = Groupie.new
117
+ groupie[:spam].add(%w[assume you have a lot of data you care about])
118
+
119
+ require 'yaml'
120
+ yaml = YAML.dump(groupie)
121
+ loaded = YAML.safe_load(yaml, permitted_classes: [Groupie, Groupie::Group, Symbol])
122
+ ```
123
+
124
+ For I'm still experimenting with Groupie in [Infinity Feed](https://www.infinity-feed.com), so persistence is a Future Problem for me there. In development, I'm building (low data count) classifiers in memory and discarding them after use.
125
+
126
+ ## Development
127
+
128
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. Rubocop is available via `bin/rubocop` with some friendly default settings.
129
+
130
+ To install this gem onto your local machine, run `bundle exec rake install`.
131
+
132
+ To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org). For obvious reasons, only the project maintainer can do this.
133
+
134
+ ## Contributing
135
+
136
+ Bug reports and pull requests are welcome on GitHub at https://github.com/Narnach/groupie.
137
+
138
+ ## License
139
+
140
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile CHANGED
@@ -1,53 +1,12 @@
1
- require 'rubygems'
2
- require 'rake'
1
+ # frozen_string_literal: true
3
2
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "groupie"
8
- gem.summary = %Q{Group and classify text}
9
- gem.description = %Q{Group and classify text based on likelyhood of being included in a text of a specific category}
10
- gem.email = "narnach@gmail.com"
11
- gem.homepage = "http://github.com/Narnach/groupie"
12
- gem.authors = ["Wes Oldenbeuving"]
13
- gem.add_development_dependency "testy", ">= 0"
14
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
- end
16
- Jeweler::GemcutterTasks.new
17
- rescue LoadError
18
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
- end
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
20
5
 
21
- require 'rake/testtask'
22
- Rake::TestTask.new(:spec) do |test|
23
- test.libs << 'lib' << 'spec'
24
- test.pattern = 'spec/**/*_spec.rb'
25
- test.verbose = true
26
- end
6
+ RSpec::Core::RakeTask.new(:spec)
27
7
 
28
- begin
29
- require 'rcov/rcovtask'
30
- Rcov::RcovTask.new do |test|
31
- test.libs << 'spec'
32
- test.pattern = 'spec/**/*_spec.rb'
33
- test.verbose = true
34
- end
35
- rescue LoadError
36
- task :rcov do
37
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
- end
39
- end
8
+ require 'rubocop/rake_task'
40
9
 
41
- task :test => :check_dependencies
10
+ RuboCop::RakeTask.new
42
11
 
43
- task :default => :test
44
-
45
- require 'rake/rdoctask'
46
- Rake::RDocTask.new do |rdoc|
47
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
-
49
- rdoc.rdoc_dir = 'rdoc'
50
- rdoc.title = "groupie #{version}"
51
- rdoc.rdoc_files.include('readme*')
52
- rdoc.rdoc_files.include('lib/**/*.rb')
53
- end
12
+ task default: [:spec, :rubocop]
data/SECURITY.md ADDED
@@ -0,0 +1,18 @@
1
+ # Security Policy
2
+
3
+ Groupie is inherently not a user-facing library, so possible vectors for exploitation seem small to me.
4
+ That said, in the event of a security vulnerability being found, this document describes how to report it.
5
+
6
+ ## Supported Versions
7
+
8
+ As a small library with infrequent updates, I will accept bug and security reports for the current minor version.
9
+ Severe issues might be backported to previous minor versions. I'll handle this on a case-by-case basis.
10
+
11
+ ## Reporting a Vulnerability
12
+
13
+ For low-risk things you can create an issue or PR.
14
+ In case of a high risk thing, you can email me at security@narnach.com.
15
+
16
+ ## Thanks
17
+
18
+ Once we have successfully handled a security vulnerability, we'll add an attribution to the list below.
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'groupie'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require 'irb'
15
+ IRB.start(__FILE__)
data/bin/rubocop ADDED
@@ -0,0 +1,2 @@
1
+ #!/bin/bash
2
+ bundle exec rubocop --force-exclusion $*