chinese_phrases 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -2
- data/README.md +30 -6
- data/bin/chinese_phrases +14 -1
- data/chinese_phrases.gemspec +2 -2
- data/lib/chinese_phrases.rb +22 -15
- data/lib/chinese_phrases/version.rb +1 -1
- metadata +4 -4
- data/lib/README.md +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b8aeee89176b66fac20a7d2dc6629c855834a3b3a0aa4faddb8019723bcedadb
|
4
|
+
data.tar.gz: e23c591e951cbaaf28392a0f49311b6245cc0b1939ec60d48a0fcf53c4ded57f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 98ea76736fdbe23adc9c253caf63d59553778fb0ed809b6838fd17c13067e386eb495f46217cec0e0a8d74d1105a1f2b87aefb239d7f7b12367a06c4b167db99
|
7
|
+
data.tar.gz: '049da271d939234b9ce6e4afd96d02e6cafd93d366be9d48532166a2e7f45d463bd1a3c4184b5d88444fc124a71a3a0685513dcc429e5ebb98dcfcf765d62e61'
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
GEM
|
2
2
|
remote: https://rubygems.org/
|
3
3
|
specs:
|
4
|
-
byebug (11.0.1)
|
5
4
|
httparty (0.17.0)
|
6
5
|
mime-types (~> 3.0)
|
7
6
|
multi_xml (>= 0.5.2)
|
@@ -9,14 +8,15 @@ GEM
|
|
9
8
|
mime-types-data (~> 3.2015)
|
10
9
|
mime-types-data (3.2019.0331)
|
11
10
|
multi_xml (0.6.0)
|
11
|
+
thor (0.20.3)
|
12
12
|
tradsim (0.5.1)
|
13
13
|
|
14
14
|
PLATFORMS
|
15
15
|
ruby
|
16
16
|
|
17
17
|
DEPENDENCIES
|
18
|
-
byebug
|
19
18
|
httparty
|
19
|
+
thor
|
20
20
|
tradsim
|
21
21
|
|
22
22
|
BUNDLED WITH
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# ChinesePhrases
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
Converts a csv of Chinese words to a csv of example sentences for use in Anki.
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -22,7 +20,33 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
25
|
-
|
23
|
+
```
|
24
|
+
chinese_phrases export input_words.csv
|
25
|
+
```
|
26
|
+
|
27
|
+
Output will be at `output_phrases.csv`.
|
28
|
+
|
29
|
+
By default output will be in traditional Chinese.
|
30
|
+
|
31
|
+
```
|
32
|
+
chinese_phrases help export
|
33
|
+
|
34
|
+
Usage:
|
35
|
+
chinese_phrases export
|
36
|
+
|
37
|
+
Options:
|
38
|
+
[--output-file=OUTPUT_FILE]
|
39
|
+
[--max-len=N]
|
40
|
+
# Default: 15
|
41
|
+
[--max-per=N]
|
42
|
+
# Default: 10
|
43
|
+
[--page-size=N]
|
44
|
+
# Default: 100
|
45
|
+
[--trad], [--no-trad]
|
46
|
+
# Default: true
|
47
|
+
|
48
|
+
Output given word csv to phrases csv
|
49
|
+
```
|
26
50
|
|
27
51
|
## Development
|
28
52
|
|
@@ -32,7 +56,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
32
56
|
|
33
57
|
## Contributing
|
34
58
|
|
35
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
59
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/bendangelo/chinese_phrases. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
36
60
|
|
37
61
|
## License
|
38
62
|
|
@@ -40,4 +64,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
40
64
|
|
41
65
|
## Code of Conduct
|
42
66
|
|
43
|
-
Everyone interacting in the ChinesePhrases project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/
|
67
|
+
Everyone interacting in the ChinesePhrases project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/bendangelo/chinese_phrases/blob/master/CODE_OF_CONDUCT.md).
|
data/bin/chinese_phrases
CHANGED
@@ -1,5 +1,18 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'chinese_phrases'
|
4
|
+
require 'thor'
|
4
5
|
|
5
|
-
|
6
|
+
class CLI < Thor
|
7
|
+
option :output_file
|
8
|
+
option :max_len, type: :numeric, default: 15
|
9
|
+
option :max_per, type: :numeric, default: 10
|
10
|
+
option :page_size, type: :numeric, default: 100
|
11
|
+
option :trad, type: :boolean, default: true
|
12
|
+
desc "export", "Output given word csv to phrases csv"
|
13
|
+
def export(name)
|
14
|
+
ChinesePhrases.run name, options
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
CLI.start(ARGV)
|
data/chinese_phrases.gemspec
CHANGED
@@ -18,8 +18,8 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
19
19
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
20
|
end
|
21
|
-
spec.bindir = "
|
22
|
-
spec.executables =
|
21
|
+
spec.bindir = "bin"
|
22
|
+
spec.executables = ["chinese_phrases"]
|
23
23
|
spec.require_paths = ["lib"]
|
24
24
|
|
25
25
|
spec.add_development_dependency "bundler", "~> 2.0"
|
data/lib/chinese_phrases.rb
CHANGED
@@ -8,31 +8,33 @@ require "chinese_phrases/version"
|
|
8
8
|
module ChinesePhrases
|
9
9
|
|
10
10
|
class << self
|
11
|
-
def run input_file
|
11
|
+
def run input_file, options = {}
|
12
12
|
input_index = 0
|
13
|
-
output_file = "output_phrases.csv"
|
14
|
-
to_trad =
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
max_examples: 10
|
13
|
+
output_file = options[:output_file] || "output_phrases.csv"
|
14
|
+
to_trad = options[:trad]
|
15
|
+
|
16
|
+
params = {
|
17
|
+
page: options[:page] || 1, # page to check on source
|
18
|
+
page_size: options[:page_size] || 100, # number of examples from api call
|
19
|
+
max_length: options[:max_len] || 15, # only accept examples length than this
|
20
|
+
max_per: options[:max_per] || 10, # only accept this number of examples
|
22
21
|
}
|
23
22
|
|
24
23
|
query_list = []
|
25
24
|
total_examples = []
|
26
25
|
|
26
|
+
# read input file to create list of words to query
|
27
27
|
CSV.foreach(input_file) do |csv|
|
28
28
|
query_list << Tradsim::to_sim(csv[input_index])
|
29
29
|
end
|
30
30
|
|
31
|
+
# query each word individually and combine to total list
|
31
32
|
query_list.each do |q|
|
32
|
-
exs = get_examples q,
|
33
|
+
exs = get_examples q, params
|
33
34
|
total_examples.push *exs
|
34
35
|
end
|
35
36
|
|
37
|
+
# output each example to file
|
36
38
|
CSV.open(output_file, "w") do |csv|
|
37
39
|
total_examples.each do |a|
|
38
40
|
example = a["example"]
|
@@ -41,19 +43,24 @@ module ChinesePhrases
|
|
41
43
|
example = Tradsim::to_trad(example)
|
42
44
|
end
|
43
45
|
|
46
|
+
puts "Writing #{a["recentTrslation"]} #{example}"
|
47
|
+
|
44
48
|
csv << [a["recentTrslation"], example, a["pinyin"]]
|
45
49
|
end
|
46
50
|
end
|
51
|
+
|
52
|
+
puts "Wrote #{output_file}"
|
47
53
|
end
|
48
54
|
|
49
55
|
def get_examples query, params
|
50
56
|
query_escaped = CGI::escape query
|
57
|
+
callback = "jQuery1111013304390430117385_1567195383336"
|
51
58
|
|
52
|
-
url = "https://dict.naver.com/linedict/cnen/example/search.dict?callback=#{
|
59
|
+
url = "https://dict.naver.com/linedict/cnen/example/search.dict?callback=#{callback}&query=#{query_escaped}&page=#{params[:page]}&page_size=#{params[:page_size]}&examType=normal&fieldType=&author=&country=&ql=default&format=json&platform=isPC&_=1567195383337"
|
53
60
|
|
54
61
|
response = HTTParty.get(url)
|
55
62
|
|
56
|
-
cleaned_resp = response.match("#{
|
63
|
+
cleaned_resp = response.match("#{callback}(.*)")[1][1..-2]
|
57
64
|
data = JSON.parse cleaned_resp
|
58
65
|
|
59
66
|
exampleList = data["exampleList"]
|
@@ -61,8 +68,8 @@ module ChinesePhrases
|
|
61
68
|
# collect shortest examples
|
62
69
|
examples = exampleList.filter { |i| i["example"].length < params[:max_length] }
|
63
70
|
|
64
|
-
if params[:
|
65
|
-
examples = examples[0..params[:
|
71
|
+
if params[:max_per] > -1
|
72
|
+
examples = examples[0..params[:max_per] - 1]
|
66
73
|
end
|
67
74
|
|
68
75
|
examples
|
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chinese_phrases
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben D'Angelo
|
8
8
|
autorequire:
|
9
|
-
bindir:
|
9
|
+
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2019-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
@@ -41,7 +41,8 @@ dependencies:
|
|
41
41
|
description:
|
42
42
|
email:
|
43
43
|
- ben@bendangelo.me
|
44
|
-
executables:
|
44
|
+
executables:
|
45
|
+
- chinese_phrases
|
45
46
|
extensions: []
|
46
47
|
extra_rdoc_files: []
|
47
48
|
files:
|
@@ -57,7 +58,6 @@ files:
|
|
57
58
|
- bin/setup
|
58
59
|
- chinese_phrases.gemspec
|
59
60
|
- input_words.csv
|
60
|
-
- lib/README.md
|
61
61
|
- lib/chinese_phrases.rb
|
62
62
|
- lib/chinese_phrases/version.rb
|
63
63
|
homepage: https://github.com/bendangelo/chinese_phrases
|