kcna 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +30 -0
- data/lib/kcna.rb +35 -11
- data/lib/kcna/article.rb +30 -1
- data/lib/kcna/version.rb +1 -1
- metadata +30 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 66b4f906f215078226818809408690061af3561b
|
4
|
+
data.tar.gz: 91857a54a93a46e71648983199f8ce5a49afee4f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 587f36f1e416aa5374b0d607fecca5a8ffde3b79a9a7551cd1a45e48fe9760b809111bcd76a597342d046890bad02f318462ed193b19eca93342b4b7a9f8ad13
|
7
|
+
data.tar.gz: f580279a254c66920f46cf0fd67d034598acf47a4a957979aa9c6d5654da2389b515c77e418163abf5824c3d19dded16ad12d449094465374bd4b91a71529437
|
data/README.md
CHANGED
@@ -1,2 +1,32 @@
|
|
1
1
|
# kcna.rb
|
2
|
+
|
2
3
|
A Ruby gem for kcna.kp(KCNA, Korean Central News Agency)
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your Gemfile and execute `bundle`. (recommended way)
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'kcna'
|
11
|
+
```
|
12
|
+
|
13
|
+
You can also install it by `gem install kcna`.
|
14
|
+
|
15
|
+
## Usage
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
require "kcna"
|
19
|
+
|
20
|
+
kcna = KCNA.new
|
21
|
+
|
22
|
+
kcna.get_article_list.each do |article|
|
23
|
+
content = kcna.get_article(article.id).content
|
24
|
+
File.write("/path/to/directory/#{article.id}.txt", content)
|
25
|
+
end
|
26
|
+
```
|
27
|
+
|
28
|
+
See also [RubyDoc](http://www.rubydoc.info/github/hinamiyagk/kcna.rb/master).
|
29
|
+
|
30
|
+
## License
|
31
|
+
|
32
|
+
MIT
|
data/lib/kcna.rb
CHANGED
@@ -3,7 +3,9 @@ require "kcna/article"
|
|
3
3
|
require "httpclient"
|
4
4
|
require "date"
|
5
5
|
require "rexml/document"
|
6
|
+
require "oga"
|
6
7
|
|
8
|
+
# KCNA provides several methods for accessing KCNA resource.
|
7
9
|
class KCNA
|
8
10
|
KO = "kor"
|
9
11
|
EN = "eng"
|
@@ -16,19 +18,22 @@ class KCNA
|
|
16
18
|
@client = HTTPClient.new
|
17
19
|
end
|
18
20
|
|
21
|
+
private def strip_html(string)
|
22
|
+
Oga.parse_html(string).children.map(&:text).join
|
23
|
+
end
|
24
|
+
|
25
|
+
# Processes raw article content.
|
26
|
+
# This method strips HTML tags and trailing unnecessary strings.
|
19
27
|
def normalize_text(content)
|
20
|
-
|
21
|
-
patterns = ["\n", "<br>", " ", great_leader_pattern]
|
22
|
-
content.gsub(Regexp.union(patterns)) do |match|
|
28
|
+
replaced_content = content.gsub(/\n|<br>| /) do |match|
|
23
29
|
case match
|
24
30
|
when "\n", " "
|
25
31
|
""
|
26
32
|
when "<br>"
|
27
33
|
"\n"
|
28
|
-
when great_leader_pattern
|
29
|
-
$1
|
30
34
|
end
|
31
35
|
end.sub(/(---|‐‐‐)$/, "")
|
36
|
+
strip_html(replaced_content)
|
32
37
|
end
|
33
38
|
|
34
39
|
private def post(path, body, max_redirect = 3)
|
@@ -45,11 +50,14 @@ class KCNA
|
|
45
50
|
end
|
46
51
|
end
|
47
52
|
|
53
|
+
# Sets the response language by sending request to kcna.kp.
|
54
|
+
# @param lang [String] the language code. One of +KCNA::KO+, +KCNA::EN+, +KCNA::ZH+, +KCNA::RU+, +KCNA::ES+, and +KCNA::JA+.
|
48
55
|
def set_language(lang)
|
49
56
|
data = {
|
50
57
|
article_code: "", article_type_list: "", news_type_code: "", show_what: "", mediaCode: "",
|
51
58
|
lang: lang
|
52
59
|
}
|
60
|
+
# Cookie is considered automatically by httpclient
|
53
61
|
post("/kcna.user.home.retrieveHomeInfoList.kcmsf", data)
|
54
62
|
end
|
55
63
|
|
@@ -58,6 +66,11 @@ class KCNA
|
|
58
66
|
post("/kcna.user.article.retrieveArticleInfoFromArticleCode.kcmsf", data).body
|
59
67
|
end
|
60
68
|
|
69
|
+
# Fetches the article by article ID.
|
70
|
+
# The content of the article is already processed by {#normalize_text},
|
71
|
+
# so you don't have to do it by youself.
|
72
|
+
# @param article_id [String] article ID.
|
73
|
+
# @return [KCNA::Article] the article data
|
61
74
|
def get_article(article_id)
|
62
75
|
doc = REXML::Document.new(fetch_article(article_id))
|
63
76
|
container = REXML::XPath.first(doc, "//NData")
|
@@ -74,20 +87,31 @@ class KCNA
|
|
74
87
|
music_count = REXML::XPath.first(doc, "//fMusicCnt").text.to_i
|
75
88
|
|
76
89
|
Article.new(
|
77
|
-
|
90
|
+
article_id, content: content,
|
78
91
|
date: date,
|
79
92
|
main_title: main_title, sub_title: sub_title, display_title: display_title,
|
80
93
|
movie_count: movie_count, photo_count: photo_count, music_count: music_count
|
81
94
|
)
|
82
95
|
end
|
83
96
|
|
84
|
-
private def fetch_article_list(start, news_type, from_date, to_date)
|
85
|
-
data = { page_start: start, kwDispTitle:
|
97
|
+
private def fetch_article_list(start, news_type, from_date, to_date, title_keyword, content_keyword)
|
98
|
+
data = { page_start: start, kwDispTitle: title_keyword, keyword: "", newsTypeCode: news_type, articleTypeList: "", photoCount: 0, movieCount: 0, kwContent: content_keyword, fromDate: from_date, toDate: to_date }
|
86
99
|
post("/kcna.user.article.retrieveArticleListForPage.kcmsf", data).body
|
87
100
|
end
|
88
101
|
|
89
|
-
|
90
|
-
|
102
|
+
# Fetches a list of articles.
|
103
|
+
# @param start [Integer] Index number for pagination.
|
104
|
+
# @param news_type [String] news type.
|
105
|
+
# @param from_date [Date, String] This method search articles after this date.
|
106
|
+
# @param to_date [Date, String] This method search articles before this date.
|
107
|
+
# @param title_keyword [String] search keyword for title.
|
108
|
+
# @param content_keyword [String] keyword for full-text search of the articles.
|
109
|
+
# @return [Array<KCNA::Article>] article list
|
110
|
+
def get_article_list(start = 0, news_type: "", from_date: "", to_date: "", title_keyword: "", content_keyword: "")
|
111
|
+
from_date = from_date.to_s unless from_date.kind_of?(String)
|
112
|
+
to_date = to_date.to_s unless to_date.kind_of?(String)
|
113
|
+
|
114
|
+
doc = REXML::Document.new(fetch_article_list(start, news_type, from_date, to_date, title_keyword, content_keyword))
|
91
115
|
article_ids = REXML::XPath.match(doc, "//articleCode").map(&:text)
|
92
116
|
disp_titles = REXML::XPath.match(doc, "//dispTitle").map { |node| normalize_text(node.text) }
|
93
117
|
main_titles = REXML::XPath.match(doc, "//mainTitle").map { |node| normalize_text(node.text) }
|
@@ -103,7 +127,7 @@ class KCNA
|
|
103
127
|
).map do |id, disp, main, sub, date, movie, music, photo|
|
104
128
|
date = "2015-04-02" if id == "AR0060168"
|
105
129
|
Article.new(
|
106
|
-
id
|
130
|
+
id, date: Date.parse(date),
|
107
131
|
display_title: disp, main_title: main, sub_title: sub,
|
108
132
|
movie_count: movie, music_count: music, photo_count: photo
|
109
133
|
)
|
data/lib/kcna/article.rb
CHANGED
@@ -1,20 +1,49 @@
|
|
1
|
+
# Represents an article.
|
1
2
|
class KCNA::Article
|
3
|
+
# @return [String] ID of the article
|
2
4
|
attr_reader :id
|
3
5
|
|
4
6
|
def self.attr_reader_hash(key, default = nil)
|
5
7
|
define_method(key, -> { @attrs[key].nil? ? default : @attrs[key] })
|
6
8
|
end
|
9
|
+
private_class_method :attr_reader_hash
|
7
10
|
|
11
|
+
# @!attribute [r]
|
12
|
+
# @return [Date] date of the article
|
8
13
|
attr_reader_hash :date
|
14
|
+
# @!attribute [r]
|
15
|
+
# @return [String] content of the article
|
9
16
|
attr_reader_hash :content
|
17
|
+
# @!attribute [r]
|
18
|
+
# @return [String] main title of the article
|
10
19
|
attr_reader_hash :main_title
|
20
|
+
# @!attribute [r]
|
21
|
+
# @return [String] subtitle of the article
|
11
22
|
attr_reader_hash :sub_title, ""
|
23
|
+
# @!attribute [r]
|
24
|
+
# @return [String]
|
12
25
|
attr_reader_hash :display_title
|
26
|
+
# @!attribute [r]
|
27
|
+
# @return [Integer] the number of movies related to the article
|
13
28
|
attr_reader_hash :movie_count, 0
|
29
|
+
# @!attribute [r]
|
30
|
+
# @return [Integer] the number of photos related to the article
|
14
31
|
attr_reader_hash :photo_count, 0
|
32
|
+
# @!attribute [r]
|
33
|
+
# @return [Integer] the number of musics related to the article
|
15
34
|
attr_reader_hash :music_count, 0
|
16
35
|
|
17
|
-
|
36
|
+
# @param [Hash] attrs attributes of article
|
37
|
+
# @option attrs [Date] :date date of the article
|
38
|
+
# @option attrs [String] :content content of the article
|
39
|
+
# @option attrs [String] :main_title main title of the article
|
40
|
+
# @option attrs [String] :sub_title subtitle of the article
|
41
|
+
# @option attrs [String] :display_title
|
42
|
+
# @option attrs [Integer] :movie_count the number of movies related to the article
|
43
|
+
# @option attrs [Integer] :photo_count the number of photos related to the article
|
44
|
+
# @option attrs [Integer] :music_count the number of musics related to the article
|
45
|
+
def initialize(id, **attrs)
|
46
|
+
raise "id is not a string" unless id.kind_of?(String)
|
18
47
|
@id = id
|
19
48
|
@attrs = attrs
|
20
49
|
end
|
data/lib/kcna/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kcna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hinata
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-07-
|
11
|
+
date: 2017-07-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httpclient
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.8'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: oga
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.10'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.10'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +94,20 @@ dependencies:
|
|
80
94
|
- - "~>"
|
81
95
|
- !ruby/object:Gem::Version
|
82
96
|
version: '0.10'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: yard
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
83
111
|
description:
|
84
112
|
email:
|
85
113
|
- syobon.hinata.public@gmail.com
|