jamnagar 1.3.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +3 -0
- data/.vagrant/machines/default/virtualbox/action_provision +1 -0
- data/.vagrant/machines/default/virtualbox/action_set_name +1 -0
- data/.vagrant/machines/default/virtualbox/id +1 -0
- data/.vagrant/machines/default/virtualbox/index_uuid +1 -0
- data/.vagrant/machines/default/virtualbox/private_key +27 -0
- data/.vagrant/machines/default/virtualbox/synced_folders +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +64 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +2 -0
- data/UTF-8-test.txt +0 -0
- data/Vagrantfile +67 -0
- data/blinky_tests +3 -0
- data/bootstrap.sh +36 -0
- data/jamnagar.gemspec +33 -0
- data/lib/jamnagar/adapters/adapter.rb +18 -0
- data/lib/jamnagar/adapters/file_system_adapter.rb +9 -0
- data/lib/jamnagar/adapters/mongo_adapter.rb +54 -0
- data/lib/jamnagar/adapters/persistent_store_adapter.rb +9 -0
- data/lib/jamnagar/initializers/mongo.rb +6 -0
- data/lib/jamnagar/materials/item.rb +15 -0
- data/lib/jamnagar/materials/ore.rb +29 -0
- data/lib/jamnagar/producers/producer.rb +5 -0
- data/lib/jamnagar/producers/rss_producer.rb +6 -0
- data/lib/jamnagar/producers/twitter_producer.rb +6 -0
- data/lib/jamnagar/refineries/content_refinery.rb +46 -0
- data/lib/jamnagar/refiners/contributor_detail.rb +17 -0
- data/lib/jamnagar/refiners/duplicate_detection.rb +23 -0
- data/lib/jamnagar/refiners/meta_data_extraction.rb +15 -0
- data/lib/jamnagar/refiners/popularity_incrementation.rb +22 -0
- data/lib/jamnagar/refiners/primary_key_generation.rb +20 -0
- data/lib/jamnagar/refiners/refiner.rb +22 -0
- data/lib/jamnagar/refiners/source_detail.rb +17 -0
- data/lib/jamnagar/refiners/url_expansion.rb +23 -0
- data/lib/jamnagar/refiners/utm_stripping.rb +13 -0
- data/lib/jamnagar/storage/basic_store.rb +34 -0
- data/lib/jamnagar/storage/contributor_store.rb +35 -0
- data/lib/jamnagar/storage/in_memory_cache.rb +17 -0
- data/lib/jamnagar/storage/item_store.rb +21 -0
- data/lib/jamnagar/storage/refined_item_store.rb +17 -0
- data/lib/jamnagar/storage/source_store.rb +35 -0
- data/lib/jamnagar/utilities/duplicate_detector.rb +12 -0
- data/lib/jamnagar/utilities/meta_data_extractor.rb +16 -0
- data/lib/jamnagar/utilities/popularity_incrementor.rb +20 -0
- data/lib/jamnagar/utilities/runner.rb +12 -0
- data/lib/jamnagar/utilities/silent_logger.rb +17 -0
- data/lib/jamnagar/utilities/url_expander.rb +47 -0
- data/lib/jamnagar/utilities/utm_stripper.rb +21 -0
- data/lib/jamnagar/verifiers/uniqueness_verifier.rb +16 -0
- data/lib/jamnagar/verifiers/verifier.rb +13 -0
- data/lib/jamnagar/version.rb +3 -0
- data/lib/jamnagar.rb +7 -0
- data/run.rb +49 -0
- data/sentinal +10 -0
- data/spec/basic_store_spec.rb +53 -0
- data/spec/content_refinement_spec.rb +74 -0
- data/spec/contributor_detail_refinment_spec.rb +26 -0
- data/spec/contributor_store_spec.rb +31 -0
- data/spec/duplicate_detector_spec.rb +26 -0
- data/spec/helpers.rb +92 -0
- data/spec/item_spec.rb +9 -0
- data/spec/item_store_spec.rb +18 -0
- data/spec/mongo_adapter_spec.rb +18 -0
- data/spec/popularity_incrementor_spec.rb +23 -0
- data/spec/producers_spec.rb +9 -0
- data/spec/refined_item_store_spec.rb +29 -0
- data/spec/refinements_spec.rb +118 -0
- data/spec/runner_spec.rb +8 -0
- data/spec/scenarios_spec.rb +4 -0
- data/spec/source_detail_refinment_spec.rb +24 -0
- data/spec/source_store_spec.rb +31 -0
- data/spec/spec_helper.rb +98 -0
- data/spec/url_expander_spec.rb +46 -0
- data/spec/utm_stripper_spec.rb +31 -0
- data/spec/utm_stripping_spec.rb +5 -0
- data/spec/verifications_spec.rb +22 -0
- data/tracer.rb +61 -0
- data/tweet_stream.json +1 -0
- metadata +288 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bd80d453e64fa92696458a499a24b41c4f23b034
|
4
|
+
data.tar.gz: eb46c5b99a58c17b36ccdeac7a267fa5ecc714bc
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d5c18347d0c3fb399e735e94c6295d80acfa0487c189e755a67d71ecbb4fd830c8dca21fde7a292d66a465612609f246d2d802f7e2d8da8f3de1eefa7df3ee41
|
7
|
+
data.tar.gz: 07efa6b5e57d45f0b274b7c920426c0eb7b7c383449a458423cc7a4b159d83843c573aa3589610bbca7105671bf9d3f33b42e2cdab240578964fc3f7192a742c
|
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.5:56a91893-8c48-4bb0-bbe2-bb5c6a71458e
|
@@ -0,0 +1 @@
|
|
1
|
+
1422936767
|
@@ -0,0 +1 @@
|
|
1
|
+
56a91893-8c48-4bb0-bbe2-bb5c6a71458e
|
@@ -0,0 +1 @@
|
|
1
|
+
cd630fe31aa041bc9a38f3df8739760c
|
@@ -0,0 +1,27 @@
|
|
1
|
+
-----BEGIN RSA PRIVATE KEY-----
|
2
|
+
MIIEpAIBAAKCAQEAxYZos0iMqoP7EUagdjF9RKZCxxQcIQul5IpJ9r47icgMbnux
|
3
|
+
5cPK4l44JadfO3YUWFvGQ1LOGVYGvL/F0E+m1JSWAjk1+9lLjdAwIMYRgrCmGRXp
|
4
|
+
8CWN9Ph/EMz60iLPfWYb1IflCfb4lBJVQIN3RL/Pif0mqhMg3wIou/kGHrAUHh5y
|
5
|
+
cpqMk4RCTvyLb8BcijTDgvK4GTV7AFDttIHAfSPUmVSBn5zhw4mpQqtPCeGA/aRP
|
6
|
+
qCKgpwXCzzXqY8vi24M2E2x3kM8GvLIxISSK+n+CuLcw2zVoDWmN2HnLB5parOWF
|
7
|
+
Bsc+Xf+fXFeQkPN3e3rnUQLrUNqHNsYamKaGpQIDAQABAoIBAF86icfFtmuO7cBK
|
8
|
+
eJoDCg3Kym8INveHkhc4xKf81t96XuP1JrNLTckM+6zLoJIeP3jJ5jAW/94fJ2hg
|
9
|
+
XJItlJTwfdHcmzYscMySDmH+m/qSB8IYWoy4zlPIahpDeLlOpxIh5FD2o2nGc9mP
|
10
|
+
bSKJPzsZ7ojsT3O3fSWG+PSG/nhUOdHG9luiuHToe9howKsvKkf4S11S9XXPx6gI
|
11
|
+
LIkdphcbykqMHhCX3rSNu9pblc0WunuQ7f4kHSQtmWfbxnzFPTraNk3+uban793u
|
12
|
+
ypBjN/el7/RtNHs0psoccv4YeKGGZYtYFh2Mq9w3j+1lA+rCRtWI2/jLMr+KZz1W
|
13
|
+
XXaEvwECgYEA9zbOO6Hei9l5o8UeZlWVGugSQbC+WIMNNSzpjFHK6UNRe7dvYZa0
|
14
|
+
/oZEir1EIbJDMw9Rn723zjVUuVDyykAur0fL6v+0OWuLnKMh7Icokv/41ZOtQCtD
|
15
|
+
zf4XPu+vEYt0LTYuiw9e8QB2U0cc7gDaVxh31+FNGjlBj1yspG2+qZECgYEAzIuG
|
16
|
+
NOoiakEEnhPcc2+JOyiUXav5sLNQxbbBaJHqq17AvNiCXpQm/nWojBiDRkq3GdSx
|
17
|
+
Lf6wN0RDnGRj30ixPx0q88Pa7ETRBaYRcmb9fTCBGu2DNTq165yAg8fNv7lH27a1
|
18
|
+
qxG7xtwyLiEM/gX8I8qPzRTXdhCnE3d3IkDL4dUCgYEAlIgJQSrwW3Y+73bv3Oxu
|
19
|
+
ucvVrEJVGzkiJmDlsbkzARPBUWPVN/0koghqBjHRTa2dUoBRunhyhyLj1LQeLAaE
|
20
|
+
BixNCNS5pAZJy8L4DvTmG+xPxYMFBRj5lDqmHNpFhXUUSFXVgRerI4HzlfWUHCvH
|
21
|
+
A61f1AU6HhdonIpB8Ek/1QECgYArB7MdEKBa0ADDrJP06OLqwhesORCXHrIAVfkM
|
22
|
+
IZ5HnPBZCgcrFGm7oE7r4gBD2lX4pij77xdtGPxMO0ZTY7x99YnirDIzACsk0BHd
|
23
|
+
ilMLchfG7W5r1UUOIudaUb8z9Stryl0fduSU6h/YWnWcPvATK3ri/t5w5QHw83Hp
|
24
|
+
j84UXQKBgQD1rsAR0KqYwquC+LE88pmb+aMOeHp596SseJhZIvH5uzcJVkCapdGY
|
25
|
+
6ow2v7ctNIVsZnollanX/faaIlP1urAh335az3jykUZ935+gZyUytojID+fGWAXK
|
26
|
+
eDCFZ2pkqwGiYNdqbMY0JeKk2sGoSiNRuDfvvmbNxolknwSzjIZ4CQ==
|
27
|
+
-----END RSA PRIVATE KEY-----
|
@@ -0,0 +1 @@
|
|
1
|
+
{"virtualbox":{"/vagrant":{"guestpath":"/vagrant","hostpath":"/Users/clayton/Projects/jamnagar","disabled":false}}}
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
jamnagar (1.3.7)
|
5
|
+
httparty (~> 0.13)
|
6
|
+
moped (~> 2.0)
|
7
|
+
multi_json (~> 1.10)
|
8
|
+
nokogiri (~> 1.6)
|
9
|
+
require_all (~> 1.3)
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
blinky-tape-test-status (1.1.3)
|
15
|
+
serialport
|
16
|
+
bson (2.3.0)
|
17
|
+
connection_pool (2.1.2)
|
18
|
+
diff-lcs (1.2.5)
|
19
|
+
docile (1.1.5)
|
20
|
+
httparty (0.13.3)
|
21
|
+
json (~> 1.8)
|
22
|
+
multi_xml (>= 0.5.2)
|
23
|
+
json (1.8.2)
|
24
|
+
mini_portile (0.6.2)
|
25
|
+
moped (2.0.4)
|
26
|
+
bson (~> 2.2)
|
27
|
+
connection_pool (~> 2.0)
|
28
|
+
optionable (~> 0.2.0)
|
29
|
+
multi_json (1.10.1)
|
30
|
+
multi_xml (0.5.5)
|
31
|
+
nokogiri (1.6.6.2)
|
32
|
+
mini_portile (~> 0.6.0)
|
33
|
+
optionable (0.2.0)
|
34
|
+
rake (10.4.2)
|
35
|
+
require_all (1.3.2)
|
36
|
+
rspec (3.1.0)
|
37
|
+
rspec-core (~> 3.1.0)
|
38
|
+
rspec-expectations (~> 3.1.0)
|
39
|
+
rspec-mocks (~> 3.1.0)
|
40
|
+
rspec-core (3.1.7)
|
41
|
+
rspec-support (~> 3.1.0)
|
42
|
+
rspec-expectations (3.1.2)
|
43
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
44
|
+
rspec-support (~> 3.1.0)
|
45
|
+
rspec-mocks (3.1.3)
|
46
|
+
rspec-support (~> 3.1.0)
|
47
|
+
rspec-support (3.1.2)
|
48
|
+
serialport (1.3.1)
|
49
|
+
simplecov (0.9.1)
|
50
|
+
docile (~> 1.1.0)
|
51
|
+
multi_json (~> 1.0)
|
52
|
+
simplecov-html (~> 0.8.0)
|
53
|
+
simplecov-html (0.8.0)
|
54
|
+
|
55
|
+
PLATFORMS
|
56
|
+
ruby
|
57
|
+
|
58
|
+
DEPENDENCIES
|
59
|
+
blinky-tape-test-status (~> 1.1)
|
60
|
+
bundler (~> 1.7)
|
61
|
+
jamnagar!
|
62
|
+
rake (~> 10.0)
|
63
|
+
rspec (~> 3.1)
|
64
|
+
simplecov (~> 0.9)
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Clayton Lengel-Zigich
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Jamnagar
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'jamnagar'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install jamnagar
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
TODO: Write usage instructions here
|
24
|
+
|
25
|
+
## Contributing
|
26
|
+
|
27
|
+
1. Fork it ( https://github.com/[my-github-username]/jamnagar/fork )
|
28
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
30
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/UTF-8-test.txt
ADDED
Binary file
|
data/Vagrantfile
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
# -*- mode: ruby -*-
|
2
|
+
# vi: set ft=ruby :
|
3
|
+
|
4
|
+
# All Vagrant configuration is done below. The "2" in Vagrant.configure
|
5
|
+
# configures the configuration version (we support older styles for
|
6
|
+
# backwards compatibility). Please don't change it unless you know what
|
7
|
+
# you're doing.
|
8
|
+
Vagrant.configure(2) do |config|
|
9
|
+
# The most common configuration options are documented and commented below.
|
10
|
+
# For a complete reference, please see the online documentation at
|
11
|
+
# https://docs.vagrantup.com.
|
12
|
+
|
13
|
+
# Every Vagrant development environment requires a box. You can search for
|
14
|
+
# boxes at https://atlas.hashicorp.com/search.
|
15
|
+
config.vm.box = "ubuntu/trusty64"
|
16
|
+
|
17
|
+
# Disable automatic box update checking. If you disable this, then
|
18
|
+
# boxes will only be checked for updates when the user runs
|
19
|
+
# `vagrant box outdated`. This is not recommended.
|
20
|
+
# config.vm.box_check_update = false
|
21
|
+
|
22
|
+
# Create a forwarded port mapping which allows access to a specific port
|
23
|
+
# within the machine from a port on the host machine. In the example below,
|
24
|
+
# accessing "localhost:8080" will access port 80 on the guest machine.
|
25
|
+
# config.vm.network "forwarded_port", guest: 80, host: 8080
|
26
|
+
|
27
|
+
# Create a private network, which allows host-only access to the machine
|
28
|
+
# using a specific IP.
|
29
|
+
# config.vm.network "private_network", ip: "192.168.33.10"
|
30
|
+
|
31
|
+
# Create a public network, which generally matched to bridged network.
|
32
|
+
# Bridged networks make the machine appear as another physical device on
|
33
|
+
# your network.
|
34
|
+
# config.vm.network "public_network"
|
35
|
+
|
36
|
+
# Share an additional folder to the guest VM. The first argument is
|
37
|
+
# the path on the host to the actual folder. The second argument is
|
38
|
+
# the path on the guest to mount the folder. And the optional third
|
39
|
+
# argument is a set of non-required options.
|
40
|
+
# config.vm.synced_folder "../data", "/vagrant_data"
|
41
|
+
|
42
|
+
# Provider-specific configuration so you can fine-tune various
|
43
|
+
# backing providers for Vagrant. These expose provider-specific options.
|
44
|
+
# Example for VirtualBox:
|
45
|
+
#
|
46
|
+
config.vm.provider "virtualbox" do |vb|
|
47
|
+
# Customize the amount of memory on the VM:
|
48
|
+
vb.memory = "2048"
|
49
|
+
end
|
50
|
+
#
|
51
|
+
# View the documentation for the provider you are using for more
|
52
|
+
# information on available options.
|
53
|
+
|
54
|
+
# Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
|
55
|
+
# such as FTP and Heroku are also available. See the documentation at
|
56
|
+
# https://docs.vagrantup.com/v2/push/atlas.html for more information.
|
57
|
+
# config.push.define "atlas" do |push|
|
58
|
+
# push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
|
59
|
+
# end
|
60
|
+
|
61
|
+
# Enable provisioning with a shell script. Additional provisioners such as
|
62
|
+
# Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
|
63
|
+
# documentation for more information about their specific syntax and use.
|
64
|
+
config.vm.provision :shell, path: "bootstrap.sh"
|
65
|
+
end
|
66
|
+
|
67
|
+
|
data/blinky_tests
ADDED
data/bootstrap.sh
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
sudo su
|
3
|
+
|
4
|
+
apt-get update
|
5
|
+
apt-get upgrade -y
|
6
|
+
apt-get install -y git-core curl zlib1g-dev build-essential libssl-dev libreadline-dev libyaml-dev libsqlite3-dev sqlite3 libxml2-dev libxslt1-dev libcurl4-openssl-dev python-software-properties apache2-mpm-worker apache2-threaded-dev libapr1-dev libaprutil1-dev
|
7
|
+
|
8
|
+
# Ruby
|
9
|
+
cd
|
10
|
+
wget http://ftp.ruby-lang.org/pub/ruby/2.1/ruby-2.1.5.tar.gz
|
11
|
+
tar -xzvf ruby-2.1.5.tar.gz
|
12
|
+
cd ruby-2.1.5/
|
13
|
+
./configure
|
14
|
+
make
|
15
|
+
make install
|
16
|
+
|
17
|
+
# Gems
|
18
|
+
echo "gem: --no-ri --no-rdoc" > ~/.gemrc
|
19
|
+
gem update --system
|
20
|
+
gem install bundler
|
21
|
+
|
22
|
+
# # Git
|
23
|
+
git config --global color.ui true
|
24
|
+
git config --global user.name "Vagrant"
|
25
|
+
git config --global user.email "vagrant@localhost"
|
26
|
+
ssh-keygen -t rsa -C "vagrant@localhost"
|
27
|
+
|
28
|
+
# Node
|
29
|
+
add-apt-repository ppa:chris-lea/node.js
|
30
|
+
apt-get update
|
31
|
+
apt-get install nodejs
|
32
|
+
|
33
|
+
# # Apache
|
34
|
+
gem install passenger --no-ri --no-rdoc
|
35
|
+
|
36
|
+
passenger-install-apache2-module -a
|
data/jamnagar.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'jamnagar/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "jamnagar"
|
8
|
+
spec.version = Jamnagar::VERSION
|
9
|
+
spec.authors = ["Clayton Lengel-Zigich"]
|
10
|
+
spec.email = ["clayton@claytonlz.com"]
|
11
|
+
spec.summary = %q{An extensible content refinery.}
|
12
|
+
spec.description = %q{Jamnagar is a library for processing content using an petroleum refinement metaphor.}
|
13
|
+
spec.homepage = "https://github.com/clayton/jamnagar"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec", "~> 3.1"
|
24
|
+
spec.add_development_dependency "blinky-tape-test-status", "~> 1.1"
|
25
|
+
spec.add_development_dependency "simplecov", "~> 0.9"
|
26
|
+
|
27
|
+
spec.add_runtime_dependency 'moped', "~> 2.0"
|
28
|
+
spec.add_runtime_dependency 'require_all', "~> 1.3"
|
29
|
+
spec.add_runtime_dependency 'multi_json', "~> 1.10"
|
30
|
+
spec.add_runtime_dependency 'httparty', "~> 0.13"
|
31
|
+
spec.add_runtime_dependency 'nokogiri', "~> 1.6"
|
32
|
+
|
33
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'moped'
|
2
|
+
|
3
|
+
module Jamnagar
|
4
|
+
module Adapters
|
5
|
+
class MongoAdapter < Adapter
|
6
|
+
|
7
|
+
def self.driver
|
8
|
+
@@driver ||= Moped::Session.new(["127.0.0.1:27017"])
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(driver=nil, database="test", collection="catchall")
|
12
|
+
@driver = driver || Jamnagar::Adapters::MongoAdapter.driver
|
13
|
+
@driver.use database
|
14
|
+
@collection = collection
|
15
|
+
end
|
16
|
+
|
17
|
+
def store(key, value)
|
18
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
19
|
+
_session[@collection.to_sym].insert(value)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def get(key)
|
24
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
25
|
+
_session[@collection.to_sym].find("_id" => key).first
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def find(params={})
|
30
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
31
|
+
_session[@collection.to_sym].find(params)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def find_first(params={})
|
36
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
37
|
+
_session[@collection.to_sym].find(params).limit(1).first
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def update(query={}, updates={})
|
42
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
43
|
+
_session[@collection.to_sym].find(query).update(updates)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def find_and_modify(params={})
|
48
|
+
@driver.with(safe: { wtimeout: 5 }) do |_session|
|
49
|
+
_session[@collection.to_sym].find(params[:query]).update(params[:update])
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Materials
|
3
|
+
class Ore
|
4
|
+
def initialize(args={})
|
5
|
+
@attributes = args
|
6
|
+
end
|
7
|
+
|
8
|
+
def merge_refinement(refinement={})
|
9
|
+
@attributes = @attributes.merge(refinement) if refinement
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_h
|
13
|
+
@attributes.to_h
|
14
|
+
end
|
15
|
+
|
16
|
+
def [](key)
|
17
|
+
to_h[key]
|
18
|
+
end
|
19
|
+
|
20
|
+
def keys
|
21
|
+
to_h.keys
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_json
|
25
|
+
JSON.dump(to_h)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refineries
|
3
|
+
class ContentRefinery
|
4
|
+
def initialize(options={})
|
5
|
+
@items = options[:items] || []
|
6
|
+
@refiners = options[:refiners] || []
|
7
|
+
@verifiers = options[:verifiers] || []
|
8
|
+
@storage = options[:storage] || Storage::ItemStore.new
|
9
|
+
@logger = options[:logger] || Jamnagar::Utilities::SilentLogger.new
|
10
|
+
@runner = options[:runner] || Jamnagar::Utilities::Runner.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def refine
|
14
|
+
@items.each_with_index do |item, index|
|
15
|
+
begin
|
16
|
+
@runner.run do
|
17
|
+
@storage.insert(verify(enrich(convert(item))))
|
18
|
+
end
|
19
|
+
rescue Jamnagar::Adapters::InsertError
|
20
|
+
@logger.error("Insert Error: _id => #{item['_id']}")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
def enrich(item)
|
27
|
+
@refiners.each do |refiner|
|
28
|
+
item = refiner.refine(item)
|
29
|
+
end
|
30
|
+
item
|
31
|
+
end
|
32
|
+
|
33
|
+
def verify(item)
|
34
|
+
@verifiers.each do |verifier|
|
35
|
+
item = verifier.verify(item)
|
36
|
+
end
|
37
|
+
item
|
38
|
+
end
|
39
|
+
|
40
|
+
def convert(item)
|
41
|
+
return Jamnagar::Materials::Item.new(item) unless item.is_a?(Jamnagar::Materials::Item)
|
42
|
+
item
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
class ContributorDetail < Refiner
|
4
|
+
def initialize(store: nil)
|
5
|
+
@store = store
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
"Contributor Detail"
|
10
|
+
end
|
11
|
+
|
12
|
+
def refinement_result(item)
|
13
|
+
{"contributor" => @store.find_contributor(item, item.raw_contributor)}
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
class DuplicateDetection < Refiner
|
4
|
+
def initialize(detector=nil, store: nil)
|
5
|
+
@detector = detector || Utilities::DuplicateDetector.new(store)
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
"Duplicate Detection"
|
10
|
+
end
|
11
|
+
|
12
|
+
def refine(item)
|
13
|
+
super item
|
14
|
+
end
|
15
|
+
|
16
|
+
def refinement_result(item)
|
17
|
+
result = @detector.detect(item["final_url"])
|
18
|
+
return {"duplicate" => false} unless result
|
19
|
+
{"duplicate" => true, "duplicate_of" => result["_id"]}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
module Twitter
|
4
|
+
class MetaDataExtraction < Refiner
|
5
|
+
def initialize(extractor=nil)
|
6
|
+
@extractor = extractor || Jamnagar::Utilities::MetaDataExtractor.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def refinement_result(item)
|
10
|
+
@extractor.extract(item)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
class PopularityIncrementation < Refiner
|
4
|
+
def initialize(incrementor=nil, store: nil)
|
5
|
+
@incrementor = incrementor || Utilities::PopularityIncrementor.new(store)
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
"Popularity Incrementor"
|
10
|
+
end
|
11
|
+
|
12
|
+
def refine(item)
|
13
|
+
super item
|
14
|
+
end
|
15
|
+
|
16
|
+
def refinement_result(item)
|
17
|
+
@incrementor.increment(item["duplicate_of"]) if item["duplicate"] == true
|
18
|
+
{"popularity" => 1}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'digest'
|
2
|
+
|
3
|
+
module Jamnagar
|
4
|
+
module Refiners
|
5
|
+
class PrimaryKeyGeneration < Refiner
|
6
|
+
def initialize(digester=nil)
|
7
|
+
@digester = digester || Digest::MD5
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_s
|
11
|
+
"Primary Key Generation"
|
12
|
+
end
|
13
|
+
|
14
|
+
def refinement_result(item)
|
15
|
+
id = @digester.hexdigest(item.to_s)
|
16
|
+
{"_id" => id}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|