carrot2 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +20 -1
- data/carrot2.gemspec +0 -1
- data/lib/carrot2.rb +19 -10
- data/lib/carrot2/version.rb +1 -1
- data/test/carrot2_test.rb +10 -2
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 33734687ea74c1750a823b92eb21648910a7c11c
|
4
|
+
data.tar.gz: 3683ea29b97fa311a23919048d49b2c4c3784701
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c87a0029e7d304717778fc7fadeee896895cb412524e7fe0d435aec2f84308cbd9d6967481abd8493dff75303024bcdd467b33f7f25947931c8702f261d0f6c
|
7
|
+
data.tar.gz: 5028411dc8eb0bf3a5629824e9fba106846f2759a3c687a43fa5d19636b1cce0bc9b564daf5ab2b0e23ab2a6ca318061345584c0641db0933d7e5b834dc70e39
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -6,6 +6,13 @@ Ruby client for [Carrot2](http://project.carrot2.org/) - the open-source documen
|
|
6
6
|
|
7
7
|
First, [download and run](http://project.carrot2.org/download-dcs.html) the Carrot2 server. It’s the one on [this page](https://github.com/carrot2/carrot2/releases) that begins with `carrot2-dcs`.
|
8
8
|
|
9
|
+
With Homebrew, use:
|
10
|
+
|
11
|
+
```sh
|
12
|
+
brew install carrot2
|
13
|
+
brew services start carrot2
|
14
|
+
```
|
15
|
+
|
9
16
|
Then add this line to your application’s Gemfile:
|
10
17
|
|
11
18
|
```ruby
|
@@ -14,6 +21,8 @@ gem 'carrot2'
|
|
14
21
|
|
15
22
|
## How to Use
|
16
23
|
|
24
|
+
To cluster documents, use:
|
25
|
+
|
17
26
|
```ruby
|
18
27
|
documents = [
|
19
28
|
"Sign up for an exclusive coupon.",
|
@@ -26,7 +35,7 @@ carrot2 = Carrot2.new
|
|
26
35
|
carrot2.cluster(documents)
|
27
36
|
```
|
28
37
|
|
29
|
-
returns
|
38
|
+
This returns:
|
30
39
|
|
31
40
|
```ruby
|
32
41
|
{
|
@@ -64,6 +73,16 @@ returns
|
|
64
73
|
|
65
74
|
Documents are numbered in the order provided, starting with 0.
|
66
75
|
|
76
|
+
For other requests, use:
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
carrot2.request(
|
80
|
+
"dcs.c2stream" => xml_str
|
81
|
+
)
|
82
|
+
```
|
83
|
+
|
84
|
+
## Configuration
|
85
|
+
|
67
86
|
To specify the Carrot2 server, set `ENV["CARROT2_URL"]` or use:
|
68
87
|
|
69
88
|
```ruby
|
data/carrot2.gemspec
CHANGED
data/lib/carrot2.rb
CHANGED
@@ -1,19 +1,22 @@
|
|
1
1
|
require "carrot2/version"
|
2
2
|
require "builder"
|
3
|
-
require "
|
3
|
+
require "net/http"
|
4
4
|
require "json"
|
5
5
|
|
6
6
|
class Carrot2
|
7
|
+
class Error < StandardError; end
|
8
|
+
|
7
9
|
def initialize(url: nil)
|
8
10
|
@url = url || ENV["CARROT2_URL"] || "http://localhost:8080"
|
9
11
|
|
10
12
|
# add dcs/rest
|
11
13
|
@url = "#{@url.sub(/\/\z/, "")}/dcs/rest"
|
14
|
+
@uri = URI.parse(@url)
|
12
15
|
end
|
13
16
|
|
14
|
-
def cluster(documents,
|
17
|
+
def cluster(documents, language: "ENGLISH")
|
15
18
|
xml = Builder::XmlMarkup.new
|
16
|
-
xml.instruct! :xml, :
|
19
|
+
xml.instruct! :xml, version: "1.0", encoding: "UTF-8"
|
17
20
|
xml.searchresult do |s|
|
18
21
|
documents.each do |document|
|
19
22
|
s.document do |d|
|
@@ -22,18 +25,24 @@ class Carrot2
|
|
22
25
|
end
|
23
26
|
end
|
24
27
|
|
25
|
-
|
26
|
-
"dcs.output.format" => "JSON",
|
28
|
+
request(
|
27
29
|
"dcs.clusters.only" => true,
|
28
30
|
"dcs.c2stream" => xml.target!,
|
29
|
-
"MultilingualClustering.defaultLanguage" =>
|
31
|
+
"MultilingualClustering.defaultLanguage" => language,
|
30
32
|
multipart: true
|
31
|
-
|
32
|
-
|
33
|
-
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
def request(params)
|
37
|
+
response = Net::HTTP.post_form(@uri, params.merge("dcs.output.format" => "JSON"))
|
38
|
+
if response.code == "200"
|
34
39
|
JSON.parse(response.body)
|
35
40
|
else
|
36
|
-
|
41
|
+
body = response.body.to_s
|
42
|
+
# try to get reason from title
|
43
|
+
m = body.match(/<title>(.+)<\/title>/)
|
44
|
+
message = m ? m[1] : body
|
45
|
+
raise Carrot2::Error, message
|
37
46
|
end
|
38
47
|
end
|
39
48
|
end
|
data/lib/carrot2/version.rb
CHANGED
data/test/carrot2_test.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative "test_helper"
|
2
2
|
|
3
3
|
class Carrot2Test < Minitest::Test
|
4
|
-
def
|
4
|
+
def test_cluster
|
5
5
|
documents = [
|
6
6
|
"Sign up for an exclusive coupon.",
|
7
7
|
"Exclusive members get a free coupon.",
|
@@ -9,7 +9,15 @@ class Carrot2Test < Minitest::Test
|
|
9
9
|
"This is completely unrelated to the other documents."
|
10
10
|
]
|
11
11
|
|
12
|
-
carrot2 = Carrot2.new
|
13
12
|
assert_equal ["Coupon", "Exclusive", "Other Topics"], carrot2.cluster(documents)["clusters"].map { |c| c["phrases"].first }
|
14
13
|
end
|
14
|
+
|
15
|
+
def test_bad_request
|
16
|
+
error = assert_raises(Carrot2::Error) { carrot2.request({}) }
|
17
|
+
assert_includes error.message, "Error 400"
|
18
|
+
end
|
19
|
+
|
20
|
+
def carrot2
|
21
|
+
@carrot2 ||= Carrot2.new
|
22
|
+
end
|
15
23
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: carrot2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-01-
|
11
|
+
date: 2017-01-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: builder
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rest-client
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: bundler
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|