groupie 0.1.1 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/dependabot.yml +21 -0
- data/.github/workflows/gem.yml +16 -0
- data/.github/workflows/rspec.yml +21 -0
- data/.github/workflows/rubocop.yml +26 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.rubocop.yml +38 -0
- data/CHANGELOG.md +93 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +67 -0
- data/LICENSE.txt +21 -0
- data/README.md +121 -0
- data/Rakefile +7 -48
- data/bin/console +15 -0
- data/bin/rubocop +2 -0
- data/bin/setup +9 -0
- data/groupie.gemspec +30 -57
- data/lib/groupie/core_ext/string.rb +6 -6
- data/lib/groupie/group.rb +11 -3
- data/lib/groupie/version.rb +10 -0
- data/lib/groupie.rb +110 -28
- metadata +52 -84
- data/.document +0 -5
- data/LICENSE +0 -20
- data/VERSION +0 -1
- data/readme.rdoc +0 -27
- data/spec/fixtures/ham/email_ham1.txt +0 -13
- data/spec/fixtures/ham/spam.la-44116217.txt +0 -79
- data/spec/fixtures/spam/email_spam1.txt +0 -5
- data/spec/fixtures/spam/email_spam2.txt +0 -7
- data/spec/fixtures/spam/spam.la-44118014.txt +0 -73
- data/spec/groupie/core_ext/string_spec.rb +0 -37
- data/spec/groupie/group_spec.rb +0 -12
- data/spec/groupie_spec.rb +0 -75
- data/spec/spec_helper.rb +0 -1
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 18730d0a05fad9b05bb1dc50302436fb7c15043fbfe665dc96b6ea927a50aa73
|
4
|
+
data.tar.gz: 8040f137e1f57080009b55cd08f0406f0ebb2e4112c7bae0838b08e4e70e0f82
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: edc0af06023ed354667a3c36857d9a004785a78506a085c97591435eb67081ddd58a42165b8c00db029e2ec97c039f60682c33a82be5488f3a35264a52cd6e77
|
7
|
+
data.tar.gz: 0c002dfbcc1e8b4df108042be00d9588eb5ff8e1b9697de99e1bc87292f07500dc1d3f33307e853f1b2dc7c8bb2e5097e4649f1a716331afed424f3a9d4c0253
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# https://docs.github.com/en/github/administering-a-repository/keeping-your-dependencies-updated-automatically
|
2
|
+
# There's a lot of options, but for now let's keep it simple.
|
3
|
+
# Check every week (Monday 5:00 CET by default) for updates.
|
4
|
+
# Each package manager gets 5 non-security updates by default.
|
5
|
+
# Security updates bypass most configuration here and show up when found.
|
6
|
+
version: 2
|
7
|
+
updates:
|
8
|
+
- package-ecosystem: github-actions
|
9
|
+
# Github Actions are checked for updates
|
10
|
+
directory: '/'
|
11
|
+
schedule:
|
12
|
+
interval: weekly
|
13
|
+
- package-ecosystem: bundler
|
14
|
+
# Bundler handles Ruby dependencies
|
15
|
+
directory: '/'
|
16
|
+
schedule:
|
17
|
+
interval: weekly
|
18
|
+
versioning-strategy: increase-if-necessary
|
19
|
+
allow:
|
20
|
+
- dependency-type: direct
|
21
|
+
- dependency-type: indirect
|
@@ -0,0 +1,16 @@
|
|
1
|
+
name: Gem building
|
2
|
+
|
3
|
+
on: [ pull_request ]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
steps:
|
9
|
+
- uses: actions/checkout@v2
|
10
|
+
- name: Set up Ruby
|
11
|
+
uses: ruby/setup-ruby@v1
|
12
|
+
with:
|
13
|
+
ruby-version: 3.0
|
14
|
+
bundler-cache: true
|
15
|
+
- name: Build the gem
|
16
|
+
run: bundle exec rake build
|
@@ -0,0 +1,21 @@
|
|
1
|
+
name: RSpec
|
2
|
+
|
3
|
+
on: [ push, pull_request ]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
strategy:
|
9
|
+
matrix:
|
10
|
+
# Maintained versions: 2.7 and 3.0
|
11
|
+
# Security updates only: 2.6
|
12
|
+
ruby: [ 2.6, 2.7, 3.0 ]
|
13
|
+
steps:
|
14
|
+
- uses: actions/checkout@v2
|
15
|
+
- name: Set up Ruby
|
16
|
+
uses: ruby/setup-ruby@v1
|
17
|
+
with:
|
18
|
+
ruby-version: ${{ matrix.ruby }}
|
19
|
+
bundler-cache: true
|
20
|
+
- name: Run the tests
|
21
|
+
run: bundle exec rspec
|
@@ -0,0 +1,26 @@
|
|
1
|
+
name: Code Quality
|
2
|
+
on: [ pull_request ]
|
3
|
+
jobs:
|
4
|
+
rubocop:
|
5
|
+
name: Rubocop
|
6
|
+
runs-on: ubuntu-latest
|
7
|
+
steps:
|
8
|
+
- name: Check out code
|
9
|
+
uses: actions/checkout@v2
|
10
|
+
- name: Install Ruby & Gems
|
11
|
+
uses: ruby/setup-ruby@v1 # Uses .ruby-version as version input
|
12
|
+
with:
|
13
|
+
ruby-version: 3.0
|
14
|
+
bundler-cache: true
|
15
|
+
- name: Rubocop
|
16
|
+
# https://github.com/reviewdog/action-rubocop
|
17
|
+
uses: reviewdog/action-rubocop@v2
|
18
|
+
with:
|
19
|
+
fail_on_error: true
|
20
|
+
filter_mode: nofilter
|
21
|
+
github_token: ${{ secrets.github_token }}
|
22
|
+
reporter: github-pr-review
|
23
|
+
rubocop_version: gemfile
|
24
|
+
# Rely on Bundler-installed gems so don't install them again
|
25
|
+
use_bundler: true
|
26
|
+
skip_install: true
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require:
|
2
|
+
- rubocop-rspec
|
3
|
+
- rubocop-rake
|
4
|
+
- rubocop-performance
|
5
|
+
|
6
|
+
AllCops:
|
7
|
+
# We bump the version to get new cops, so enable them by default
|
8
|
+
NewCops: enable
|
9
|
+
|
10
|
+
# Two lines should fit next to each other in split view on a widescreen
|
11
|
+
Layout/LineLength:
|
12
|
+
Max: 120
|
13
|
+
|
14
|
+
# We still have old-style rspec checks, so this triggers on functional comparisons there
|
15
|
+
Lint/Void:
|
16
|
+
Exclude:
|
17
|
+
- 'spec/**/*_spec.rb'
|
18
|
+
|
19
|
+
# RSpec has a lot of blocks, so ignore this rule there
|
20
|
+
Metrics/BlockLength:
|
21
|
+
Exclude:
|
22
|
+
- 'spec/**/*_spec.rb'
|
23
|
+
|
24
|
+
# I prefer to see the class name over "described_class"
|
25
|
+
RSpec/DescribedClass:
|
26
|
+
EnforcedStyle: explicit
|
27
|
+
|
28
|
+
# I prefer groups for structure, so the defaults are a little too strict for me
|
29
|
+
RSpec/NestedGroups:
|
30
|
+
Max: 4
|
31
|
+
|
32
|
+
# I prefer more verbose examples, so tend to use more lines than the defaults
|
33
|
+
RSpec/ExampleLength:
|
34
|
+
Max: 20
|
35
|
+
|
36
|
+
# For strings I enjoy using %w[], but for symbols the %i[] syntax just does not click.
|
37
|
+
Style/SymbolArray:
|
38
|
+
EnforcedStyle: brackets
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
## Unreleased changes
|
2
|
+
|
3
|
+
## Version 0.4.1 -- 2021-09-08
|
4
|
+
|
5
|
+
Non-functional fixes to the CI config and Rubygems.org metadata.
|
6
|
+
|
7
|
+
- Fix: correct changelog uri for gem
|
8
|
+
- CI: fix dependabot config
|
9
|
+
|
10
|
+
## Version 0.4.0 -- 2021-09-07
|
11
|
+
|
12
|
+
Welcome to 2021, where Ruby version 2.6 is the lowest with official support, Bundler is the default for managing packages and RSpec version 3 is used to test things. This version updates Groupie into this decade.
|
13
|
+
|
14
|
+
- Refactor: update Groupie to 2021 standards
|
15
|
+
- Feat: raise Groupie::Error instead of RuntimeError
|
16
|
+
- Feat: deprecate String#tokenize in favor of Groupie.tokenize
|
17
|
+
- Doc: document API of Groupie
|
18
|
+
- Doc: update readme with examples
|
19
|
+
- Refactor: reorder Groupie methods by importance
|
20
|
+
- Refactor: simplify Groupie#classify
|
21
|
+
- Refactor: reduce complexity of Groupie#unique_words
|
22
|
+
- Refactor: simplify Groupie#classify\_text
|
23
|
+
|
24
|
+
## Version 0.3.0 -- 2010-07-29
|
25
|
+
|
26
|
+
Multiple changes to the 'unique words' strategy, hopefully improving the behavior.
|
27
|
+
|
28
|
+
- Cache unique words in an instance var to reduce time required to do subsequent lookups
|
29
|
+
- Sanity spec
|
30
|
+
- Unique strategy now includes all words except for the global 4th quartile
|
31
|
+
- Unique strategy changed yet again: only ignore words that occur more than their group's median
|
32
|
+
- Unique strategy now behaves like sqrt that only checks unique words
|
33
|
+
- Unique word finder uses less elegant but (hopefully) faster code
|
34
|
+
- Removed gemspec
|
35
|
+
|
36
|
+
## Version 0.2.3 -- 2010-07-29
|
37
|
+
|
38
|
+
Add a new 'unique words' strategy, which ignores words that occur in all categories.
|
39
|
+
|
40
|
+
- Added 'unique' classification strategy
|
41
|
+
- Added Group#<< as alias for Group#add
|
42
|
+
- Updated readme
|
43
|
+
|
44
|
+
## Version 0.2.2 -- 2010-07-25
|
45
|
+
|
46
|
+
Bugfix for log10 strategy.
|
47
|
+
|
48
|
+
- Fixed log10 strategy counting for Groupie.classify
|
49
|
+
|
50
|
+
## Version 0.2.1 -- 2010-07-25
|
51
|
+
|
52
|
+
Offer multiple ways to weigh word counts in calculating final scores.
|
53
|
+
|
54
|
+
- Added sqrt and log word counting strategies
|
55
|
+
|
56
|
+
## Version 0.2.0 -- 2010-07-25
|
57
|
+
|
58
|
+
Classification can't raise division by zero errors anymore.
|
59
|
+
|
60
|
+
- Groupie.classify_text ignores unclassified tokens
|
61
|
+
|
62
|
+
## Version 0.1.1 -- 2010-07-25
|
63
|
+
|
64
|
+
Swap test framework and tokenization improvements.
|
65
|
+
|
66
|
+
- Regenerated gemspec
|
67
|
+
- Strip quotes from tokens
|
68
|
+
- Replaced testy tests with rspec
|
69
|
+
|
70
|
+
## Version 0.1.0 -- 2010-07-25
|
71
|
+
|
72
|
+
The initial release as a gem, after working on this on/off over a year.
|
73
|
+
|
74
|
+
- Added gemspec
|
75
|
+
- Fixed text classification to properly average group scores
|
76
|
+
- Added test for classifying tokenized html email spam
|
77
|
+
- Classification of texts is now possible
|
78
|
+
- Added readme and MIT license
|
79
|
+
- Test the full html and headers of tokenized emails
|
80
|
+
- Support infix commas for tokenized strings
|
81
|
+
- Allow infix dots in tokenized strings
|
82
|
+
- Strip HTML tags when sanitizing a string
|
83
|
+
- Classify common words based on tokenized text from spam.la e-mails
|
84
|
+
- Added String#tokenize
|
85
|
+
- Ensure a Group will still work when loaded from YAML
|
86
|
+
- Added test helper file
|
87
|
+
- Refactored Group to maintain a Hash of words and counts instead of a list of words
|
88
|
+
- Removed obsolete method
|
89
|
+
- Added testcase for three groups
|
90
|
+
- Support multiple examples to add more weight to their grouping
|
91
|
+
- Renamed tests to reflect intent of content
|
92
|
+
- Classification now allows for a degree of certainty
|
93
|
+
- Implemented simple spam check
|
data/Gemfile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
|
5
|
+
# Specify your gem's dependencies in groupie.gemspec
|
6
|
+
gemspec
|
7
|
+
|
8
|
+
gem 'rake', '~> 13.0'
|
9
|
+
gem 'rspec', '~> 3.0'
|
10
|
+
gem 'rubocop', '~> 1.7'
|
11
|
+
gem 'rubocop-performance', '~> 1.11'
|
12
|
+
gem 'rubocop-rake', '~> 0.6.0'
|
13
|
+
gem 'rubocop-rspec', '~> 2.4'
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
groupie (0.4.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
ast (2.4.2)
|
10
|
+
diff-lcs (1.4.4)
|
11
|
+
parallel (1.20.1)
|
12
|
+
parser (3.0.2.0)
|
13
|
+
ast (~> 2.4.1)
|
14
|
+
rainbow (3.0.0)
|
15
|
+
rake (13.0.6)
|
16
|
+
regexp_parser (2.1.1)
|
17
|
+
rexml (3.2.5)
|
18
|
+
rspec (3.10.0)
|
19
|
+
rspec-core (~> 3.10.0)
|
20
|
+
rspec-expectations (~> 3.10.0)
|
21
|
+
rspec-mocks (~> 3.10.0)
|
22
|
+
rspec-core (3.10.1)
|
23
|
+
rspec-support (~> 3.10.0)
|
24
|
+
rspec-expectations (3.10.1)
|
25
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
+
rspec-support (~> 3.10.0)
|
27
|
+
rspec-mocks (3.10.2)
|
28
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
29
|
+
rspec-support (~> 3.10.0)
|
30
|
+
rspec-support (3.10.2)
|
31
|
+
rubocop (1.20.0)
|
32
|
+
parallel (~> 1.10)
|
33
|
+
parser (>= 3.0.0.0)
|
34
|
+
rainbow (>= 2.2.2, < 4.0)
|
35
|
+
regexp_parser (>= 1.8, < 3.0)
|
36
|
+
rexml
|
37
|
+
rubocop-ast (>= 1.9.1, < 2.0)
|
38
|
+
ruby-progressbar (~> 1.7)
|
39
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
40
|
+
rubocop-ast (1.11.0)
|
41
|
+
parser (>= 3.0.1.1)
|
42
|
+
rubocop-performance (1.11.5)
|
43
|
+
rubocop (>= 1.7.0, < 2.0)
|
44
|
+
rubocop-ast (>= 0.4.0)
|
45
|
+
rubocop-rake (0.6.0)
|
46
|
+
rubocop (~> 1.0)
|
47
|
+
rubocop-rspec (2.4.0)
|
48
|
+
rubocop (~> 1.0)
|
49
|
+
rubocop-ast (>= 1.1.0)
|
50
|
+
ruby-progressbar (1.11.0)
|
51
|
+
unicode-display_width (2.0.0)
|
52
|
+
|
53
|
+
PLATFORMS
|
54
|
+
x86_64-darwin-20
|
55
|
+
x86_64-linux
|
56
|
+
|
57
|
+
DEPENDENCIES
|
58
|
+
groupie!
|
59
|
+
rake (~> 13.0)
|
60
|
+
rspec (~> 3.0)
|
61
|
+
rubocop (~> 1.7)
|
62
|
+
rubocop-performance (~> 1.11)
|
63
|
+
rubocop-rake (~> 0.6.0)
|
64
|
+
rubocop-rspec (~> 2.4)
|
65
|
+
|
66
|
+
BUNDLED WITH
|
67
|
+
2.2.24
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2009-2021 Wes Oldenbeuving
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
# Groupie
|
2
|
+
|
3
|
+
Groupie is a simple way to group texts and classify new texts as being a likely member of one of the defined groups. Think of bayesian spam filters.
|
4
|
+
|
5
|
+
The eventual goal is to have Groupie work as a sort of bayesian spam filter, where you feed it spam and ham (non-spam) and ask it to classify new texts as spam or ham. Applications for this are e-mail spam filtering and blog spam filtering. Other sorts of categorizing might be interesting as well, such as finding suitable tags for a blog post or bookmark.
|
6
|
+
|
7
|
+
Started and forgotten in 2009 as a short-lived experiment, in 2010 Groupie got new features when I started using it on a RSS reader project that classified news items into "Interesting" and "Not interesting" categories.
|
8
|
+
|
9
|
+
## Current functionality
|
10
|
+
|
11
|
+
Current funcionality includes:
|
12
|
+
|
13
|
+
* Tokenize an input text to prepare it for grouping.
|
14
|
+
* Strip XML and HTML tag.
|
15
|
+
* Keep certain infix characters, such as period and comma.
|
16
|
+
* Add texts (as an Array of Strings) to any number of groups.
|
17
|
+
* Classify a single word to check the likelihood it belongs to each group.
|
18
|
+
* Do classification for complete (tokenized) texts.
|
19
|
+
* Pick classification strategy to weigh repeat words differently (weigh by sum, square root or log10 of words in group)
|
20
|
+
|
21
|
+
## Installation
|
22
|
+
|
23
|
+
Add this line to your application's Gemfile:
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
gem 'groupie'
|
27
|
+
```
|
28
|
+
|
29
|
+
You can also perform this to do this for you:
|
30
|
+
|
31
|
+
bundle add groupie
|
32
|
+
|
33
|
+
And then execute:
|
34
|
+
|
35
|
+
bundle install
|
36
|
+
|
37
|
+
Or install it system-wide via:
|
38
|
+
|
39
|
+
gem install groupie
|
40
|
+
|
41
|
+
## Usage
|
42
|
+
|
43
|
+
Here is an annotated console session that shows off the features available in Groupie.
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
# Instantiate a new Groupie instance
|
47
|
+
groupie = Groupie.new
|
48
|
+
|
49
|
+
# Groups are defined as you use them, so let's get started by adding some pre-tokenized words
|
50
|
+
groupie[:spam].add(%w[this is obvious spam please buy our product])
|
51
|
+
groupie[:spam].add(%w[hello friend this is rich prince i have awesome bitcoin for you])
|
52
|
+
groupie[:ham].add(%w[you are invited to my awesome party just click the link to rsvp])
|
53
|
+
|
54
|
+
# Is your data less than clean? We've got a tokenizer for that!
|
55
|
+
tokens = Groupie.tokenize('Please give me your password so I can haxx0r you!')
|
56
|
+
# => ["please", "give", "me", "your", "password", "so", "i", "can", "haxx0r", "you"]
|
57
|
+
groupie[:spam].add(tokens)
|
58
|
+
|
59
|
+
# So, now let's attempt to classify a text and see if it's spam or ham:
|
60
|
+
test_tokens = %w[please click the link to reset your password for our awesome product]
|
61
|
+
groupie.classify_text(test_tokens)
|
62
|
+
# => {:spam=>0.5909090909090909, :ham=>0.4090909090909091}
|
63
|
+
# As you can see, this password reset email looks a little dodgy...
|
64
|
+
# We have multiple strategies for drawing conclusions about what group it belongs to.
|
65
|
+
# The default you saw above is :sum, it weighs each word by the total sum of occurrences.
|
66
|
+
# Let's see if it looks less bad by using a different classification strategies.
|
67
|
+
|
68
|
+
# Log reduces the weight of each word to the log10 of its occurrence count:
|
69
|
+
# - Count 1 is weight 0
|
70
|
+
# - Count 10 is weight 1
|
71
|
+
# - Count 100 is weight 2
|
72
|
+
groupie.classify_text(test_tokens, :log)
|
73
|
+
# => {:spam=>0.5, :ham=>0.5}
|
74
|
+
# This is even more even, most likely because it ignores all single-count words...
|
75
|
+
|
76
|
+
# Square root algorithm is less harsh, it reduces the weight of each word to the square root of the count:
|
77
|
+
# - Count 1 is weight 1
|
78
|
+
# - Count 4 is weight 2
|
79
|
+
# - Count 9 is weight 3
|
80
|
+
groupie.classify_text(test_tokens, :sqrt)
|
81
|
+
# => {:spam=>0.5909090909090909, :ham=>0.4090909090909091}
|
82
|
+
# This seems to result in the same value as :sum
|
83
|
+
|
84
|
+
# Unique uses the same weighting algorithm as the square root, but it modifies the word dictionary:
|
85
|
+
# it discards the 25% most common words, so less common words gain higher predictive power.
|
86
|
+
groupie.classify_text(test_tokens, :unique)
|
87
|
+
# => {:spam=>0.625, :ham=>0.375}
|
88
|
+
# This looks even worse for our poor password reset email.
|
89
|
+
# In case you're curious, the ignored words in this case are:
|
90
|
+
test_tokens - (test_tokens & groupie.unique_words)
|
91
|
+
# => ["please", "to", "reset", "awesome"]
|
92
|
+
# If you'd be classifying email, you can assume that common email headers will get ignored this way.
|
93
|
+
```
|
94
|
+
|
95
|
+
Persistence can be naively done by using YAML:
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
# Instantiate a new Groupie instance
|
99
|
+
groupie = Groupie.new
|
100
|
+
groupie[:spam].add(%w[assume you have a lot of data you care about])
|
101
|
+
|
102
|
+
require 'yaml'
|
103
|
+
yaml = YAML.dump(groupie)
|
104
|
+
loaded = YAML.safe_load(yaml, permitted_classes: [Groupie, Groupie::Group, Symbol])
|
105
|
+
```
|
106
|
+
|
107
|
+
For I'm still experimenting with Groupie in [Infinity Feed](https://www.infinity-feed.com), so persistence is a Future Problem for me there. In development, I'm building (low data count) classifiers in memory and discarding them after use.
|
108
|
+
|
109
|
+
## Development
|
110
|
+
|
111
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. Rubocop is available via `bin/rubocop` with some friendly default settings.
|
112
|
+
|
113
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
114
|
+
|
115
|
+
## Contributing
|
116
|
+
|
117
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/Narnach/groupie.
|
118
|
+
|
119
|
+
## License
|
120
|
+
|
121
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|