jamnagar 1.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +3 -0
- data/.vagrant/machines/default/virtualbox/action_provision +1 -0
- data/.vagrant/machines/default/virtualbox/action_set_name +1 -0
- data/.vagrant/machines/default/virtualbox/id +1 -0
- data/.vagrant/machines/default/virtualbox/index_uuid +1 -0
- data/.vagrant/machines/default/virtualbox/private_key +27 -0
- data/.vagrant/machines/default/virtualbox/synced_folders +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +64 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +2 -0
- data/UTF-8-test.txt +0 -0
- data/Vagrantfile +67 -0
- data/blinky_tests +3 -0
- data/bootstrap.sh +36 -0
- data/jamnagar.gemspec +33 -0
- data/lib/jamnagar/adapters/adapter.rb +18 -0
- data/lib/jamnagar/adapters/file_system_adapter.rb +9 -0
- data/lib/jamnagar/adapters/mongo_adapter.rb +54 -0
- data/lib/jamnagar/adapters/persistent_store_adapter.rb +9 -0
- data/lib/jamnagar/initializers/mongo.rb +6 -0
- data/lib/jamnagar/materials/item.rb +15 -0
- data/lib/jamnagar/materials/ore.rb +29 -0
- data/lib/jamnagar/producers/producer.rb +5 -0
- data/lib/jamnagar/producers/rss_producer.rb +6 -0
- data/lib/jamnagar/producers/twitter_producer.rb +6 -0
- data/lib/jamnagar/refineries/content_refinery.rb +46 -0
- data/lib/jamnagar/refiners/contributor_detail.rb +17 -0
- data/lib/jamnagar/refiners/duplicate_detection.rb +23 -0
- data/lib/jamnagar/refiners/meta_data_extraction.rb +15 -0
- data/lib/jamnagar/refiners/popularity_incrementation.rb +22 -0
- data/lib/jamnagar/refiners/primary_key_generation.rb +20 -0
- data/lib/jamnagar/refiners/refiner.rb +22 -0
- data/lib/jamnagar/refiners/source_detail.rb +17 -0
- data/lib/jamnagar/refiners/url_expansion.rb +23 -0
- data/lib/jamnagar/refiners/utm_stripping.rb +13 -0
- data/lib/jamnagar/storage/basic_store.rb +34 -0
- data/lib/jamnagar/storage/contributor_store.rb +35 -0
- data/lib/jamnagar/storage/in_memory_cache.rb +17 -0
- data/lib/jamnagar/storage/item_store.rb +21 -0
- data/lib/jamnagar/storage/refined_item_store.rb +17 -0
- data/lib/jamnagar/storage/source_store.rb +35 -0
- data/lib/jamnagar/utilities/duplicate_detector.rb +12 -0
- data/lib/jamnagar/utilities/meta_data_extractor.rb +16 -0
- data/lib/jamnagar/utilities/popularity_incrementor.rb +20 -0
- data/lib/jamnagar/utilities/runner.rb +12 -0
- data/lib/jamnagar/utilities/silent_logger.rb +17 -0
- data/lib/jamnagar/utilities/url_expander.rb +47 -0
- data/lib/jamnagar/utilities/utm_stripper.rb +21 -0
- data/lib/jamnagar/verifiers/uniqueness_verifier.rb +16 -0
- data/lib/jamnagar/verifiers/verifier.rb +13 -0
- data/lib/jamnagar/version.rb +3 -0
- data/lib/jamnagar.rb +7 -0
- data/run.rb +49 -0
- data/sentinal +10 -0
- data/spec/basic_store_spec.rb +53 -0
- data/spec/content_refinement_spec.rb +74 -0
- data/spec/contributor_detail_refinment_spec.rb +26 -0
- data/spec/contributor_store_spec.rb +31 -0
- data/spec/duplicate_detector_spec.rb +26 -0
- data/spec/helpers.rb +92 -0
- data/spec/item_spec.rb +9 -0
- data/spec/item_store_spec.rb +18 -0
- data/spec/mongo_adapter_spec.rb +18 -0
- data/spec/popularity_incrementor_spec.rb +23 -0
- data/spec/producers_spec.rb +9 -0
- data/spec/refined_item_store_spec.rb +29 -0
- data/spec/refinements_spec.rb +118 -0
- data/spec/runner_spec.rb +8 -0
- data/spec/scenarios_spec.rb +4 -0
- data/spec/source_detail_refinment_spec.rb +24 -0
- data/spec/source_store_spec.rb +31 -0
- data/spec/spec_helper.rb +98 -0
- data/spec/url_expander_spec.rb +46 -0
- data/spec/utm_stripper_spec.rb +31 -0
- data/spec/utm_stripping_spec.rb +5 -0
- data/spec/verifications_spec.rb +22 -0
- data/tracer.rb +61 -0
- data/tweet_stream.json +1 -0
- metadata +288 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bd80d453e64fa92696458a499a24b41c4f23b034
|
4
|
+
data.tar.gz: eb46c5b99a58c17b36ccdeac7a267fa5ecc714bc
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d5c18347d0c3fb399e735e94c6295d80acfa0487c189e755a67d71ecbb4fd830c8dca21fde7a292d66a465612609f246d2d802f7e2d8da8f3de1eefa7df3ee41
|
7
|
+
data.tar.gz: 07efa6b5e57d45f0b274b7c920426c0eb7b7c383449a458423cc7a4b159d83843c573aa3589610bbca7105671bf9d3f33b42e2cdab240578964fc3f7192a742c
|
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.5:56a91893-8c48-4bb0-bbe2-bb5c6a71458e
|
@@ -0,0 +1 @@
|
|
1
|
+
1422936767
|
@@ -0,0 +1 @@
|
|
1
|
+
56a91893-8c48-4bb0-bbe2-bb5c6a71458e
|
@@ -0,0 +1 @@
|
|
1
|
+
cd630fe31aa041bc9a38f3df8739760c
|
@@ -0,0 +1,27 @@
|
|
1
|
+
-----BEGIN RSA PRIVATE KEY-----
|
2
|
+
MIIEpAIBAAKCAQEAxYZos0iMqoP7EUagdjF9RKZCxxQcIQul5IpJ9r47icgMbnux
|
3
|
+
5cPK4l44JadfO3YUWFvGQ1LOGVYGvL/F0E+m1JSWAjk1+9lLjdAwIMYRgrCmGRXp
|
4
|
+
8CWN9Ph/EMz60iLPfWYb1IflCfb4lBJVQIN3RL/Pif0mqhMg3wIou/kGHrAUHh5y
|
5
|
+
cpqMk4RCTvyLb8BcijTDgvK4GTV7AFDttIHAfSPUmVSBn5zhw4mpQqtPCeGA/aRP
|
6
|
+
qCKgpwXCzzXqY8vi24M2E2x3kM8GvLIxISSK+n+CuLcw2zVoDWmN2HnLB5parOWF
|
7
|
+
Bsc+Xf+fXFeQkPN3e3rnUQLrUNqHNsYamKaGpQIDAQABAoIBAF86icfFtmuO7cBK
|
8
|
+
eJoDCg3Kym8INveHkhc4xKf81t96XuP1JrNLTckM+6zLoJIeP3jJ5jAW/94fJ2hg
|
9
|
+
XJItlJTwfdHcmzYscMySDmH+m/qSB8IYWoy4zlPIahpDeLlOpxIh5FD2o2nGc9mP
|
10
|
+
bSKJPzsZ7ojsT3O3fSWG+PSG/nhUOdHG9luiuHToe9howKsvKkf4S11S9XXPx6gI
|
11
|
+
LIkdphcbykqMHhCX3rSNu9pblc0WunuQ7f4kHSQtmWfbxnzFPTraNk3+uban793u
|
12
|
+
ypBjN/el7/RtNHs0psoccv4YeKGGZYtYFh2Mq9w3j+1lA+rCRtWI2/jLMr+KZz1W
|
13
|
+
XXaEvwECgYEA9zbOO6Hei9l5o8UeZlWVGugSQbC+WIMNNSzpjFHK6UNRe7dvYZa0
|
14
|
+
/oZEir1EIbJDMw9Rn723zjVUuVDyykAur0fL6v+0OWuLnKMh7Icokv/41ZOtQCtD
|
15
|
+
zf4XPu+vEYt0LTYuiw9e8QB2U0cc7gDaVxh31+FNGjlBj1yspG2+qZECgYEAzIuG
|
16
|
+
NOoiakEEnhPcc2+JOyiUXav5sLNQxbbBaJHqq17AvNiCXpQm/nWojBiDRkq3GdSx
|
17
|
+
Lf6wN0RDnGRj30ixPx0q88Pa7ETRBaYRcmb9fTCBGu2DNTq165yAg8fNv7lH27a1
|
18
|
+
qxG7xtwyLiEM/gX8I8qPzRTXdhCnE3d3IkDL4dUCgYEAlIgJQSrwW3Y+73bv3Oxu
|
19
|
+
ucvVrEJVGzkiJmDlsbkzARPBUWPVN/0koghqBjHRTa2dUoBRunhyhyLj1LQeLAaE
|
20
|
+
BixNCNS5pAZJy8L4DvTmG+xPxYMFBRj5lDqmHNpFhXUUSFXVgRerI4HzlfWUHCvH
|
21
|
+
A61f1AU6HhdonIpB8Ek/1QECgYArB7MdEKBa0ADDrJP06OLqwhesORCXHrIAVfkM
|
22
|
+
IZ5HnPBZCgcrFGm7oE7r4gBD2lX4pij77xdtGPxMO0ZTY7x99YnirDIzACsk0BHd
|
23
|
+
ilMLchfG7W5r1UUOIudaUb8z9Stryl0fduSU6h/YWnWcPvATK3ri/t5w5QHw83Hp
|
24
|
+
j84UXQKBgQD1rsAR0KqYwquC+LE88pmb+aMOeHp596SseJhZIvH5uzcJVkCapdGY
|
25
|
+
6ow2v7ctNIVsZnollanX/faaIlP1urAh335az3jykUZ935+gZyUytojID+fGWAXK
|
26
|
+
eDCFZ2pkqwGiYNdqbMY0JeKk2sGoSiNRuDfvvmbNxolknwSzjIZ4CQ==
|
27
|
+
-----END RSA PRIVATE KEY-----
|
@@ -0,0 +1 @@
|
|
1
|
+
{"virtualbox":{"/vagrant":{"guestpath":"/vagrant","hostpath":"/Users/clayton/Projects/jamnagar","disabled":false}}}
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
jamnagar (1.3.7)
|
5
|
+
httparty (~> 0.13)
|
6
|
+
moped (~> 2.0)
|
7
|
+
multi_json (~> 1.10)
|
8
|
+
nokogiri (~> 1.6)
|
9
|
+
require_all (~> 1.3)
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
blinky-tape-test-status (1.1.3)
|
15
|
+
serialport
|
16
|
+
bson (2.3.0)
|
17
|
+
connection_pool (2.1.2)
|
18
|
+
diff-lcs (1.2.5)
|
19
|
+
docile (1.1.5)
|
20
|
+
httparty (0.13.3)
|
21
|
+
json (~> 1.8)
|
22
|
+
multi_xml (>= 0.5.2)
|
23
|
+
json (1.8.2)
|
24
|
+
mini_portile (0.6.2)
|
25
|
+
moped (2.0.4)
|
26
|
+
bson (~> 2.2)
|
27
|
+
connection_pool (~> 2.0)
|
28
|
+
optionable (~> 0.2.0)
|
29
|
+
multi_json (1.10.1)
|
30
|
+
multi_xml (0.5.5)
|
31
|
+
nokogiri (1.6.6.2)
|
32
|
+
mini_portile (~> 0.6.0)
|
33
|
+
optionable (0.2.0)
|
34
|
+
rake (10.4.2)
|
35
|
+
require_all (1.3.2)
|
36
|
+
rspec (3.1.0)
|
37
|
+
rspec-core (~> 3.1.0)
|
38
|
+
rspec-expectations (~> 3.1.0)
|
39
|
+
rspec-mocks (~> 3.1.0)
|
40
|
+
rspec-core (3.1.7)
|
41
|
+
rspec-support (~> 3.1.0)
|
42
|
+
rspec-expectations (3.1.2)
|
43
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
44
|
+
rspec-support (~> 3.1.0)
|
45
|
+
rspec-mocks (3.1.3)
|
46
|
+
rspec-support (~> 3.1.0)
|
47
|
+
rspec-support (3.1.2)
|
48
|
+
serialport (1.3.1)
|
49
|
+
simplecov (0.9.1)
|
50
|
+
docile (~> 1.1.0)
|
51
|
+
multi_json (~> 1.0)
|
52
|
+
simplecov-html (~> 0.8.0)
|
53
|
+
simplecov-html (0.8.0)
|
54
|
+
|
55
|
+
PLATFORMS
|
56
|
+
ruby
|
57
|
+
|
58
|
+
DEPENDENCIES
|
59
|
+
blinky-tape-test-status (~> 1.1)
|
60
|
+
bundler (~> 1.7)
|
61
|
+
jamnagar!
|
62
|
+
rake (~> 10.0)
|
63
|
+
rspec (~> 3.1)
|
64
|
+
simplecov (~> 0.9)
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Clayton Lengel-Zigich
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Jamnagar
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'jamnagar'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install jamnagar
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
TODO: Write usage instructions here
|
24
|
+
|
25
|
+
## Contributing
|
26
|
+
|
27
|
+
1. Fork it ( https://github.com/[my-github-username]/jamnagar/fork )
|
28
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
30
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/UTF-8-test.txt
ADDED
Binary file
|
data/Vagrantfile
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
# -*- mode: ruby -*-
|
2
|
+
# vi: set ft=ruby :
|
3
|
+
|
4
|
+
# All Vagrant configuration is done below. The "2" in Vagrant.configure
|
5
|
+
# configures the configuration version (we support older styles for
|
6
|
+
# backwards compatibility). Please don't change it unless you know what
|
7
|
+
# you're doing.
|
8
|
+
Vagrant.configure(2) do |config|
|
9
|
+
# The most common configuration options are documented and commented below.
|
10
|
+
# For a complete reference, please see the online documentation at
|
11
|
+
# https://docs.vagrantup.com.
|
12
|
+
|
13
|
+
# Every Vagrant development environment requires a box. You can search for
|
14
|
+
# boxes at https://atlas.hashicorp.com/search.
|
15
|
+
config.vm.box = "ubuntu/trusty64"
|
16
|
+
|
17
|
+
# Disable automatic box update checking. If you disable this, then
|
18
|
+
# boxes will only be checked for updates when the user runs
|
19
|
+
# `vagrant box outdated`. This is not recommended.
|
20
|
+
# config.vm.box_check_update = false
|
21
|
+
|
22
|
+
# Create a forwarded port mapping which allows access to a specific port
|
23
|
+
# within the machine from a port on the host machine. In the example below,
|
24
|
+
# accessing "localhost:8080" will access port 80 on the guest machine.
|
25
|
+
# config.vm.network "forwarded_port", guest: 80, host: 8080
|
26
|
+
|
27
|
+
# Create a private network, which allows host-only access to the machine
|
28
|
+
# using a specific IP.
|
29
|
+
# config.vm.network "private_network", ip: "192.168.33.10"
|
30
|
+
|
31
|
+
# Create a public network, which generally matched to bridged network.
|
32
|
+
# Bridged networks make the machine appear as another physical device on
|
33
|
+
# your network.
|
34
|
+
# config.vm.network "public_network"
|
35
|
+
|
36
|
+
# Share an additional folder to the guest VM. The first argument is
|
37
|
+
# the path on the host to the actual folder. The second argument is
|
38
|
+
# the path on the guest to mount the folder. And the optional third
|
39
|
+
# argument is a set of non-required options.
|
40
|
+
# config.vm.synced_folder "../data", "/vagrant_data"
|
41
|
+
|
42
|
+
# Provider-specific configuration so you can fine-tune various
|
43
|
+
# backing providers for Vagrant. These expose provider-specific options.
|
44
|
+
# Example for VirtualBox:
|
45
|
+
#
|
46
|
+
config.vm.provider "virtualbox" do |vb|
|
47
|
+
# Customize the amount of memory on the VM:
|
48
|
+
vb.memory = "2048"
|
49
|
+
end
|
50
|
+
#
|
51
|
+
# View the documentation for the provider you are using for more
|
52
|
+
# information on available options.
|
53
|
+
|
54
|
+
# Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
|
55
|
+
# such as FTP and Heroku are also available. See the documentation at
|
56
|
+
# https://docs.vagrantup.com/v2/push/atlas.html for more information.
|
57
|
+
# config.push.define "atlas" do |push|
|
58
|
+
# push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
|
59
|
+
# end
|
60
|
+
|
61
|
+
# Enable provisioning with a shell script. Additional provisioners such as
|
62
|
+
# Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
|
63
|
+
# documentation for more information about their specific syntax and use.
|
64
|
+
config.vm.provision :shell, path: "bootstrap.sh"
|
65
|
+
end
|
66
|
+
|
67
|
+
|
data/blinky_tests
ADDED
data/bootstrap.sh
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
sudo su
|
3
|
+
|
4
|
+
apt-get update
|
5
|
+
apt-get upgrade -y
|
6
|
+
apt-get install -y git-core curl zlib1g-dev build-essential libssl-dev libreadline-dev libyaml-dev libsqlite3-dev sqlite3 libxml2-dev libxslt1-dev libcurl4-openssl-dev python-software-properties apache2-mpm-worker apache2-threaded-dev libapr1-dev libaprutil1-dev
|
7
|
+
|
8
|
+
# Ruby
|
9
|
+
cd
|
10
|
+
wget http://ftp.ruby-lang.org/pub/ruby/2.1/ruby-2.1.5.tar.gz
|
11
|
+
tar -xzvf ruby-2.1.5.tar.gz
|
12
|
+
cd ruby-2.1.5/
|
13
|
+
./configure
|
14
|
+
make
|
15
|
+
make install
|
16
|
+
|
17
|
+
# Gems
|
18
|
+
echo "gem: --no-ri --no-rdoc" > ~/.gemrc
|
19
|
+
gem update --system
|
20
|
+
gem install bundler
|
21
|
+
|
22
|
+
# # Git
|
23
|
+
git config --global color.ui true
|
24
|
+
git config --global user.name "Vagrant"
|
25
|
+
git config --global user.email "vagrant@localhost"
|
26
|
+
ssh-keygen -t rsa -C "vagrant@localhost"
|
27
|
+
|
28
|
+
# Node
|
29
|
+
add-apt-repository ppa:chris-lea/node.js
|
30
|
+
apt-get update
|
31
|
+
apt-get install nodejs
|
32
|
+
|
33
|
+
# # Apache
|
34
|
+
gem install passenger --no-ri --no-rdoc
|
35
|
+
|
36
|
+
passenger-install-apache2-module -a
|
data/jamnagar.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'jamnagar/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "jamnagar"
|
8
|
+
spec.version = Jamnagar::VERSION
|
9
|
+
spec.authors = ["Clayton Lengel-Zigich"]
|
10
|
+
spec.email = ["clayton@claytonlz.com"]
|
11
|
+
spec.summary = %q{An extensible content refinery.}
|
12
|
+
spec.description = %q{Jamnagar is a library for processing content using an petroleum refinement metaphor.}
|
13
|
+
spec.homepage = "https://github.com/clayton/jamnagar"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec", "~> 3.1"
|
24
|
+
spec.add_development_dependency "blinky-tape-test-status", "~> 1.1"
|
25
|
+
spec.add_development_dependency "simplecov", "~> 0.9"
|
26
|
+
|
27
|
+
spec.add_runtime_dependency 'moped', "~> 2.0"
|
28
|
+
spec.add_runtime_dependency 'require_all', "~> 1.3"
|
29
|
+
spec.add_runtime_dependency 'multi_json', "~> 1.10"
|
30
|
+
spec.add_runtime_dependency 'httparty', "~> 0.13"
|
31
|
+
spec.add_runtime_dependency 'nokogiri', "~> 1.6"
|
32
|
+
|
33
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'moped'
|
2
|
+
|
3
|
+
module Jamnagar
|
4
|
+
module Adapters
|
5
|
+
class MongoAdapter < Adapter
|
6
|
+
|
7
|
+
def self.driver
|
8
|
+
@@driver ||= Moped::Session.new(["127.0.0.1:27017"])
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(driver=nil, database="test", collection="catchall")
|
12
|
+
@driver = driver || Jamnagar::Adapters::MongoAdapter.driver
|
13
|
+
@driver.use database
|
14
|
+
@collection = collection
|
15
|
+
end
|
16
|
+
|
17
|
+
def store(key, value)
|
18
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
19
|
+
_session[@collection.to_sym].insert(value)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def get(key)
|
24
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
25
|
+
_session[@collection.to_sym].find("_id" => key).first
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def find(params={})
|
30
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
31
|
+
_session[@collection.to_sym].find(params)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def find_first(params={})
|
36
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
37
|
+
_session[@collection.to_sym].find(params).limit(1).first
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def update(query={}, updates={})
|
42
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
43
|
+
_session[@collection.to_sym].find(query).update(updates)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def find_and_modify(params={})
|
48
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
49
|
+
_session[@collection.to_sym].find(params[:query]).update(params[:update])
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Materials
|
3
|
+
class Ore
|
4
|
+
def initialize(args={})
|
5
|
+
@attributes = args
|
6
|
+
end
|
7
|
+
|
8
|
+
def merge_refinement(refinement={})
|
9
|
+
@attributes = @attributes.merge(refinement) if refinement
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_h
|
13
|
+
@attributes.to_h
|
14
|
+
end
|
15
|
+
|
16
|
+
def [](key)
|
17
|
+
to_h[key]
|
18
|
+
end
|
19
|
+
|
20
|
+
def keys
|
21
|
+
to_h.keys
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_json
|
25
|
+
JSON.dump(to_h)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refineries
|
3
|
+
class ContentRefinery
|
4
|
+
def initialize(options={})
|
5
|
+
@items = options[:items] || []
|
6
|
+
@refiners = options[:refiners] || []
|
7
|
+
@verifiers = options[:verifiers] || []
|
8
|
+
@storage = options[:storage] || Storage::ItemStore.new
|
9
|
+
@logger = options[:logger] || Jamnagar::Utilities::SilentLogger.new
|
10
|
+
@runner = options[:runner] || Jamnagar::Utilities::Runner.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def refine
|
14
|
+
@items.each_with_index do |item, index|
|
15
|
+
begin
|
16
|
+
@runner.run do
|
17
|
+
@storage.insert(verify(enrich(convert(item))))
|
18
|
+
end
|
19
|
+
rescue Jamnagar::Adapters::InsertError
|
20
|
+
@logger.error("Insert Error: _id => #{item['_id']}")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
def enrich(item)
|
27
|
+
@refiners.each do |refiner|
|
28
|
+
item = refiner.refine(item)
|
29
|
+
end
|
30
|
+
item
|
31
|
+
end
|
32
|
+
|
33
|
+
def verify(item)
|
34
|
+
@verifiers.each do |verifier|
|
35
|
+
item = verifier.verify(item)
|
36
|
+
end
|
37
|
+
item
|
38
|
+
end
|
39
|
+
|
40
|
+
def convert(item)
|
41
|
+
return Jamnagar::Materials::Item.new(item) unless item.is_a?(Jamnagar::Materials::Item)
|
42
|
+
item
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
class ContributorDetail < Refiner
|
4
|
+
def initialize(store: nil)
|
5
|
+
@store = store
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
"Contributor Detail"
|
10
|
+
end
|
11
|
+
|
12
|
+
def refinement_result(item)
|
13
|
+
{"contributor" => @store.find_contributor(item, item.raw_contributor)}
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
class DuplicateDetection < Refiner
|
4
|
+
def initialize(detector=nil, store: nil)
|
5
|
+
@detector = detector || Utilities::DuplicateDetector.new(store)
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
"Duplicate Detection"
|
10
|
+
end
|
11
|
+
|
12
|
+
def refine(item)
|
13
|
+
super item
|
14
|
+
end
|
15
|
+
|
16
|
+
def refinement_result(item)
|
17
|
+
result = @detector.detect(item["final_url"])
|
18
|
+
return {"duplicate" => false} unless result
|
19
|
+
{"duplicate" => true, "duplicate_of" => result["_id"]}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
module Twitter
|
4
|
+
class MetaDataExtraction < Refiner
|
5
|
+
def initialize(extractor=nil)
|
6
|
+
@extractor = extractor || Jamnagar::Utilities::MetaDataExtractor.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def refinement_result(item)
|
10
|
+
@extractor.extract(item)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
class PopularityIncrementation < Refiner
|
4
|
+
def initialize(incrementor=nil, store: nil)
|
5
|
+
@incrementor = incrementor || Utilities::PopularityIncrementor.new(store)
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
"Popularity Incrementor"
|
10
|
+
end
|
11
|
+
|
12
|
+
def refine(item)
|
13
|
+
super item
|
14
|
+
end
|
15
|
+
|
16
|
+
def refinement_result(item)
|
17
|
+
@incrementor.increment(item["duplicate_of"]) if item["duplicate"] == true
|
18
|
+
{"popularity" => 1}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'digest'
|
2
|
+
|
3
|
+
module Jamnagar
|
4
|
+
module Refiners
|
5
|
+
class PrimaryKeyGeneration < Refiner
|
6
|
+
def initialize(digester=nil)
|
7
|
+
@digester = digester || Digest::MD5
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_s
|
11
|
+
"Primary Key Generation"
|
12
|
+
end
|
13
|
+
|
14
|
+
def refinement_result(item)
|
15
|
+
id = @digester.hexdigest(item.to_s)
|
16
|
+
{"_id" => id}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|