kcna 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6e8215fbfaf2abdef2e8fd42f3e9b11cce17b762
4
+ data.tar.gz: b8214218bb1104dba3529e3c20c610aa1aa72804
5
+ SHA512:
6
+ metadata.gz: 8e1d84e5676bacc5a107cc4ad29680eced2c56a1cf8120bd4a3af8c6783d216a9942d9780c3262c4fdb5c1c11b535af97b85c3d72dd66d91d4a81bcafc2127dc
7
+ data.tar.gz: 12a957382d0fcfa5d207d5788fd5af82ab974f34d9a16eacf233b556722fb34e88cb456628e5104ec3fd9332bf010423d6120a3833432478a3d3e4f2b821f179
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2017 Hinata
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # kcna.rb
2
+ A Ruby gem for kcna.kp(KCNA, Korean Central News Agency)
data/lib/kcna.rb ADDED
@@ -0,0 +1,112 @@
1
+ require "kcna/version"
2
+ require "kcna/article"
3
+ require "httpclient"
4
+ require "date"
5
+ require "rexml/document"
6
+
7
+ class KCNA
8
+ KO = "kor"
9
+ EN = "eng"
10
+ ZH = "chn"
11
+ RU = "rus"
12
+ ES = "spn"
13
+ JA = "jpn"
14
+
15
+ def initialize
16
+ @client = HTTPClient.new
17
+ end
18
+
19
+ def normalize_text(content)
20
+ great_leader_pattern = /<nobr><strong><font.*>(.*)<\/font><\/strong><\/nobr>/
21
+ patterns = ["\n", "<br>", "&nbsp;", great_leader_pattern]
22
+ content.gsub(Regexp.union(patterns)) do |match|
23
+ case match
24
+ when "\n", "&nbsp;"
25
+ ""
26
+ when "<br>"
27
+ "\n"
28
+ when great_leader_pattern
29
+ $1
30
+ end
31
+ end.sub(/(---|‐‐‐)$/, "")
32
+ end
33
+
34
+ private def post(path, body, max_redirect = 3)
35
+ raise "Too many redirects" if max_redirect == 0
36
+
37
+ res = @client.post("http://kcna.kp#{path}", body: body)
38
+ if res.ok?
39
+ res
40
+ elsif res.redirect?
41
+ raise "Response error: #{res.status}" unless res.status == HTTP::Status::TEMPORARY_REDIRECT
42
+ post(path, body, max_redirect - 1)
43
+ else
44
+ raise "Response error: #{res.status}"
45
+ end
46
+ end
47
+
48
+ def set_language(lang)
49
+ data = {
50
+ article_code: "", article_type_list: "", news_type_code: "", show_what: "", mediaCode: "",
51
+ lang: lang
52
+ }
53
+ post("/kcna.user.home.retrieveHomeInfoList.kcmsf", data)
54
+ end
55
+
56
+ private def fetch_article(article_id)
57
+ data = { article_code: article_id, kwContent: "" }
58
+ post("/kcna.user.article.retrieveArticleInfoFromArticleCode.kcmsf", data).body
59
+ end
60
+
61
+ def get_article(article_id)
62
+ doc = REXML::Document.new(fetch_article(article_id))
63
+ container = REXML::XPath.first(doc, "//NData")
64
+ raise "Article not found" if container.elements.size == 0
65
+
66
+ date = Date.strptime(REXML::XPath.first(doc, "//articleCreateDate").text, "%Y.%m.%d")
67
+ content = normalize_text(REXML::XPath.first(doc, "//content").text)
68
+ display_title = normalize_text(REXML::XPath.first(doc, "//dispTitle").text)
69
+ main_title = normalize_text(REXML::XPath.first(doc, "//mainTitle").text)
70
+ sub_title = normalize_text(REXML::XPath.first(doc, "//subTitle").text)
71
+ article_id = REXML::XPath.first(doc, "//articleCode").text
72
+ movie_count = REXML::XPath.first(doc, "//fMovieCnt").text.to_i
73
+ photo_count = REXML::XPath.first(doc, "//fPhotoCnt").text.to_i
74
+ music_count = REXML::XPath.first(doc, "//fMusicCnt").text.to_i
75
+
76
+ Article.new(
77
+ id: article_id, content: content,
78
+ date: date,
79
+ main_title: main_title, sub_title: sub_title, display_title: display_title,
80
+ movie_count: movie_count, photo_count: photo_count, music_count: music_count
81
+ )
82
+ end
83
+
84
+ private def fetch_article_list(start, news_type, from_date, to_date)
85
+ data = { page_start: start, kwDispTitle: "", keyword: "", newsTypeCode: news_type, articleTypeList: "", photoCount: 0, movieCount: 0, kwContent: "", fromDate: from_date, toDate: to_date }
86
+ post("/kcna.user.article.retrieveArticleListForPage.kcmsf", data).body
87
+ end
88
+
89
+ def get_article_list(start = 0, news_type: "", from_date: "", to_date: "")
90
+ doc = REXML::Document.new(fetch_article_list(start, news_type, from_date, to_date))
91
+ article_ids = REXML::XPath.match(doc, "//articleCode").map(&:text)
92
+ disp_titles = REXML::XPath.match(doc, "//dispTitle").map { |node| normalize_text(node.text) }
93
+ main_titles = REXML::XPath.match(doc, "//mainTitle").map { |node| normalize_text(node.text) }
94
+ sub_titles = REXML::XPath.match(doc, "//subTitle").map { |node| normalize_text(node.text) }
95
+ dates = REXML::XPath.match(doc, "//sendInfo").map(&:text)
96
+ movie_counts = REXML::XPath.match(doc, "//fMovieCnt").map { |node| node.text.to_i }
97
+ music_counts = REXML::XPath.match(doc, "//fMusicCnt").map { |node| node.text.to_i }
98
+ photo_counts = REXML::XPath.match(doc, "//fPhotoCnt").map { |node| node.text.to_i }
99
+
100
+ article_ids.zip(
101
+ disp_titles, main_titles, sub_titles, dates,
102
+ movie_counts, music_counts, photo_counts
103
+ ).map do |id, disp, main, sub, date, movie, music, photo|
104
+ date = "2015-04-02" if id == "AR0060168"
105
+ Article.new(
106
+ id: id, date: Date.parse(date),
107
+ display_title: disp, main_title: main, sub_title: sub,
108
+ movie_count: movie, music_count: music, photo_count: photo
109
+ )
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,21 @@
1
+ class KCNA::Article
2
+ attr_reader :id
3
+
4
+ def self.attr_reader_hash(key, default = nil)
5
+ define_method(key, -> { @attrs[key].nil? ? default : @attrs[key] })
6
+ end
7
+
8
+ attr_reader_hash :date
9
+ attr_reader_hash :content
10
+ attr_reader_hash :main_title
11
+ attr_reader_hash :sub_title, ""
12
+ attr_reader_hash :display_title
13
+ attr_reader_hash :movie_count, 0
14
+ attr_reader_hash :photo_count, 0
15
+ attr_reader_hash :music_count, 0
16
+
17
+ def initialize(id:, **attrs)
18
+ @id = id
19
+ @attrs = attrs
20
+ end
21
+ end
@@ -0,0 +1,3 @@
1
+ class KCNA
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kcna
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Hinata
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-07-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: httpclient
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.8'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.8'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.15'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.15'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '5.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '5.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.10'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.10'
83
+ description:
84
+ email:
85
+ - syobon.hinata.public@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - LICENSE
91
+ - README.md
92
+ - lib/kcna.rb
93
+ - lib/kcna/article.rb
94
+ - lib/kcna/version.rb
95
+ homepage: https://github.com/hinamiyagk/kcna.rb
96
+ licenses:
97
+ - MIT
98
+ metadata: {}
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ requirements: []
114
+ rubyforge_project:
115
+ rubygems_version: 2.6.12
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: A Ruby gem for kcna.kp(KCNA, Korean Central News Agency)
119
+ test_files: []