kcna 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +30 -0
- data/lib/kcna.rb +35 -11
- data/lib/kcna/article.rb +30 -1
- data/lib/kcna/version.rb +1 -1
- metadata +30 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 66b4f906f215078226818809408690061af3561b
|
4
|
+
data.tar.gz: 91857a54a93a46e71648983199f8ce5a49afee4f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 587f36f1e416aa5374b0d607fecca5a8ffde3b79a9a7551cd1a45e48fe9760b809111bcd76a597342d046890bad02f318462ed193b19eca93342b4b7a9f8ad13
|
7
|
+
data.tar.gz: f580279a254c66920f46cf0fd67d034598acf47a4a957979aa9c6d5654da2389b515c77e418163abf5824c3d19dded16ad12d449094465374bd4b91a71529437
|
data/README.md
CHANGED
@@ -1,2 +1,32 @@
|
|
1
1
|
# kcna.rb
|
2
|
+
|
2
3
|
A Ruby gem for kcna.kp(KCNA, Korean Central News Agency)
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your Gemfile and execute `bundle`. (recommended way)
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'kcna'
|
11
|
+
```
|
12
|
+
|
13
|
+
You can also install it by `gem install kcna`.
|
14
|
+
|
15
|
+
## Usage
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
require "kcna"
|
19
|
+
|
20
|
+
kcna = KCNA.new
|
21
|
+
|
22
|
+
kcna.get_article_list.each do |article|
|
23
|
+
content = kcna.get_article(article.id).content
|
24
|
+
File.write("/path/to/directory/#{article.id}.txt", content)
|
25
|
+
end
|
26
|
+
```
|
27
|
+
|
28
|
+
See also [RubyDoc](http://www.rubydoc.info/github/hinamiyagk/kcna.rb/master).
|
29
|
+
|
30
|
+
## License
|
31
|
+
|
32
|
+
MIT
|
data/lib/kcna.rb
CHANGED
@@ -3,7 +3,9 @@ require "kcna/article"
|
|
3
3
|
require "httpclient"
|
4
4
|
require "date"
|
5
5
|
require "rexml/document"
|
6
|
+
require "oga"
|
6
7
|
|
8
|
+
# KCNA provides several methods for accessing KCNA resource.
|
7
9
|
class KCNA
|
8
10
|
KO = "kor"
|
9
11
|
EN = "eng"
|
@@ -16,19 +18,22 @@ class KCNA
|
|
16
18
|
@client = HTTPClient.new
|
17
19
|
end
|
18
20
|
|
21
|
+
private def strip_html(string)
|
22
|
+
Oga.parse_html(string).children.map(&:text).join
|
23
|
+
end
|
24
|
+
|
25
|
+
# Processes raw article content.
|
26
|
+
# This method strips HTML tags and trailing unnecessary strings.
|
19
27
|
def normalize_text(content)
|
20
|
-
|
21
|
-
patterns = ["\n", "<br>", " ", great_leader_pattern]
|
22
|
-
content.gsub(Regexp.union(patterns)) do |match|
|
28
|
+
replaced_content = content.gsub(/\n|<br>| /) do |match|
|
23
29
|
case match
|
24
30
|
when "\n", " "
|
25
31
|
""
|
26
32
|
when "<br>"
|
27
33
|
"\n"
|
28
|
-
when great_leader_pattern
|
29
|
-
$1
|
30
34
|
end
|
31
35
|
end.sub(/(---|‐‐‐)$/, "")
|
36
|
+
strip_html(replaced_content)
|
32
37
|
end
|
33
38
|
|
34
39
|
private def post(path, body, max_redirect = 3)
|
@@ -45,11 +50,14 @@ class KCNA
|
|
45
50
|
end
|
46
51
|
end
|
47
52
|
|
53
|
+
# Sets the response language by sending request to kcna.kp.
|
54
|
+
# @param lang [String] the language code. One of +KCNA::KO+, +KCNA::EN+, +KCNA::ZH+, +KCNA::RU+, +KCNA::ES+, and +KCNA::JA+.
|
48
55
|
def set_language(lang)
|
49
56
|
data = {
|
50
57
|
article_code: "", article_type_list: "", news_type_code: "", show_what: "", mediaCode: "",
|
51
58
|
lang: lang
|
52
59
|
}
|
60
|
+
# Cookie is considered automatically by httpclient
|
53
61
|
post("/kcna.user.home.retrieveHomeInfoList.kcmsf", data)
|
54
62
|
end
|
55
63
|
|
@@ -58,6 +66,11 @@ class KCNA
|
|
58
66
|
post("/kcna.user.article.retrieveArticleInfoFromArticleCode.kcmsf", data).body
|
59
67
|
end
|
60
68
|
|
69
|
+
# Fetches the article by article ID.
|
70
|
+
# The content of the article is already processed by {#normalize_text},
|
71
|
+
# so you don't have to do it by youself.
|
72
|
+
# @param article_id [String] article ID.
|
73
|
+
# @return [KCNA::Article] the article data
|
61
74
|
def get_article(article_id)
|
62
75
|
doc = REXML::Document.new(fetch_article(article_id))
|
63
76
|
container = REXML::XPath.first(doc, "//NData")
|
@@ -74,20 +87,31 @@ class KCNA
|
|
74
87
|
music_count = REXML::XPath.first(doc, "//fMusicCnt").text.to_i
|
75
88
|
|
76
89
|
Article.new(
|
77
|
-
|
90
|
+
article_id, content: content,
|
78
91
|
date: date,
|
79
92
|
main_title: main_title, sub_title: sub_title, display_title: display_title,
|
80
93
|
movie_count: movie_count, photo_count: photo_count, music_count: music_count
|
81
94
|
)
|
82
95
|
end
|
83
96
|
|
84
|
-
private def fetch_article_list(start, news_type, from_date, to_date)
|
85
|
-
data = { page_start: start, kwDispTitle:
|
97
|
+
private def fetch_article_list(start, news_type, from_date, to_date, title_keyword, content_keyword)
|
98
|
+
data = { page_start: start, kwDispTitle: title_keyword, keyword: "", newsTypeCode: news_type, articleTypeList: "", photoCount: 0, movieCount: 0, kwContent: content_keyword, fromDate: from_date, toDate: to_date }
|
86
99
|
post("/kcna.user.article.retrieveArticleListForPage.kcmsf", data).body
|
87
100
|
end
|
88
101
|
|
89
|
-
|
90
|
-
|
102
|
+
# Fetches a list of articles.
|
103
|
+
# @param start [Integer] Index number for pagination.
|
104
|
+
# @param news_type [String] news type.
|
105
|
+
# @param from_date [Date, String] This method search articles after this date.
|
106
|
+
# @param to_date [Date, String] This method search articles before this date.
|
107
|
+
# @param title_keyword [String] search keyword for title.
|
108
|
+
# @param content_keyword [String] keyword for full-text search of the articles.
|
109
|
+
# @return [Array<KCNA::Article>] article list
|
110
|
+
def get_article_list(start = 0, news_type: "", from_date: "", to_date: "", title_keyword: "", content_keyword: "")
|
111
|
+
from_date = from_date.to_s unless from_date.kind_of?(String)
|
112
|
+
to_date = to_date.to_s unless to_date.kind_of?(String)
|
113
|
+
|
114
|
+
doc = REXML::Document.new(fetch_article_list(start, news_type, from_date, to_date, title_keyword, content_keyword))
|
91
115
|
article_ids = REXML::XPath.match(doc, "//articleCode").map(&:text)
|
92
116
|
disp_titles = REXML::XPath.match(doc, "//dispTitle").map { |node| normalize_text(node.text) }
|
93
117
|
main_titles = REXML::XPath.match(doc, "//mainTitle").map { |node| normalize_text(node.text) }
|
@@ -103,7 +127,7 @@ class KCNA
|
|
103
127
|
).map do |id, disp, main, sub, date, movie, music, photo|
|
104
128
|
date = "2015-04-02" if id == "AR0060168"
|
105
129
|
Article.new(
|
106
|
-
id
|
130
|
+
id, date: Date.parse(date),
|
107
131
|
display_title: disp, main_title: main, sub_title: sub,
|
108
132
|
movie_count: movie, music_count: music, photo_count: photo
|
109
133
|
)
|
data/lib/kcna/article.rb
CHANGED
@@ -1,20 +1,49 @@
|
|
1
|
+
# Represents an article.
|
1
2
|
class KCNA::Article
|
3
|
+
# @return [String] ID of the article
|
2
4
|
attr_reader :id
|
3
5
|
|
4
6
|
def self.attr_reader_hash(key, default = nil)
|
5
7
|
define_method(key, -> { @attrs[key].nil? ? default : @attrs[key] })
|
6
8
|
end
|
9
|
+
private_class_method :attr_reader_hash
|
7
10
|
|
11
|
+
# @!attribute [r]
|
12
|
+
# @return [Date] date of the article
|
8
13
|
attr_reader_hash :date
|
14
|
+
# @!attribute [r]
|
15
|
+
# @return [String] content of the article
|
9
16
|
attr_reader_hash :content
|
17
|
+
# @!attribute [r]
|
18
|
+
# @return [String] main title of the article
|
10
19
|
attr_reader_hash :main_title
|
20
|
+
# @!attribute [r]
|
21
|
+
# @return [String] subtitle of the article
|
11
22
|
attr_reader_hash :sub_title, ""
|
23
|
+
# @!attribute [r]
|
24
|
+
# @return [String]
|
12
25
|
attr_reader_hash :display_title
|
26
|
+
# @!attribute [r]
|
27
|
+
# @return [Integer] the number of movies related to the article
|
13
28
|
attr_reader_hash :movie_count, 0
|
29
|
+
# @!attribute [r]
|
30
|
+
# @return [Integer] the number of photos related to the article
|
14
31
|
attr_reader_hash :photo_count, 0
|
32
|
+
# @!attribute [r]
|
33
|
+
# @return [Integer] the number of musics related to the article
|
15
34
|
attr_reader_hash :music_count, 0
|
16
35
|
|
17
|
-
|
36
|
+
# @param [Hash] attrs attributes of article
|
37
|
+
# @option attrs [Date] :date date of the article
|
38
|
+
# @option attrs [String] :content content of the article
|
39
|
+
# @option attrs [String] :main_title main title of the article
|
40
|
+
# @option attrs [String] :sub_title subtitle of the article
|
41
|
+
# @option attrs [String] :display_title
|
42
|
+
# @option attrs [Integer] :movie_count the number of movies related to the article
|
43
|
+
# @option attrs [Integer] :photo_count the number of photos related to the article
|
44
|
+
# @option attrs [Integer] :music_count the number of musics related to the article
|
45
|
+
def initialize(id, **attrs)
|
46
|
+
raise "id is not a string" unless id.kind_of?(String)
|
18
47
|
@id = id
|
19
48
|
@attrs = attrs
|
20
49
|
end
|
data/lib/kcna/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kcna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hinata
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-07-
|
11
|
+
date: 2017-07-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httpclient
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.8'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: oga
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.10'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.10'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +94,20 @@ dependencies:
|
|
80
94
|
- - "~>"
|
81
95
|
- !ruby/object:Gem::Version
|
82
96
|
version: '0.10'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: yard
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
83
111
|
description:
|
84
112
|
email:
|
85
113
|
- syobon.hinata.public@gmail.com
|