xbrlware-ruby19 1.1.2.19.1 → 1.1.2.19.2
Sign up to get free protection for your applications and to get access to all the features.
data/lib/xbrlware-ruby19.rb
CHANGED
@@ -17,8 +17,9 @@
|
|
17
17
|
# See the License for the specific language governing permissions and
|
18
18
|
# limitations under the License.
|
19
19
|
#
|
20
|
-
require 'rubygems'
|
21
20
|
require 'xmlsimple'
|
21
|
+
require 'rexml/document'
|
22
|
+
require 'rexml/streamlistener'
|
22
23
|
|
23
24
|
require 'date'
|
24
25
|
require 'bigdecimal'
|
@@ -58,8 +59,8 @@ require 'xbrlware-ruby19/linkbase/calculation_linkbase'
|
|
58
59
|
require 'xbrlware-ruby19/linkbase/definition_linkbase'
|
59
60
|
require 'xbrlware-ruby19/linkbase/presentation_linkbase'
|
60
61
|
|
61
|
-
require 'xbrlware-ruby19/edgar_data_downloader'
|
62
62
|
require 'xbrlware-ruby19/edgar_util'
|
63
|
+
require 'xbrlware-ruby19/edgar_data_downloader'
|
63
64
|
|
64
65
|
require 'logger'
|
65
66
|
require 'benchmark'
|
@@ -17,110 +17,112 @@
|
|
17
17
|
# See the License for the specific language governing permissions and
|
18
18
|
# limitations under the License.
|
19
19
|
#
|
20
|
-
module
|
20
|
+
module Xbrlware
|
21
|
+
module Edgar
|
22
|
+
|
23
|
+
# This class defines method to download XBRL files from SEC's XBRL RSS Feed.
|
24
|
+
# See {report generation xbrlware wiki}[http://code.google.com/p/xbrlware/wiki/ReportGeneration] for how to use this class.
|
25
|
+
class RSSFeedDownloader
|
26
|
+
include ::Xbrlware::Edgar::FileUtil
|
21
27
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
mkdir(data_dir)
|
47
|
-
files.each do |file|
|
48
|
-
file_content=open(file["edgar:url"]).read
|
49
|
-
dump_to_file(data_dir+File::SEPARATOR+file["edgar:file"], file_content)
|
28
|
+
attr_reader :content
|
29
|
+
|
30
|
+
def initialize(sec_edgar_rss_file=nil)
|
31
|
+
sec_edgar_rss_file ||= "http://www.sec.gov/Archives/edgar/usgaap.rss.xml"
|
32
|
+
@content = XmlSimple.xml_in(open(sec_edgar_rss_file).read, {'ForceContent' => true})
|
33
|
+
end
|
34
|
+
|
35
|
+
# Takes limit (how many entities to download), download_to (where to download)
|
36
|
+
# default value for limit is 100
|
37
|
+
# default value for download_to is current_dir + "/edgar_data"
|
38
|
+
def download(limit=100, download_to=File.expand_path(".")+File::SEPARATOR+"edgar_data")
|
39
|
+
items=@content["channel"][0]["item"]
|
40
|
+
items.each_with_index do |item, index|
|
41
|
+
break if index==limit
|
42
|
+
files=get_xbrl_files(item)
|
43
|
+
download_to += File::SEPARATOR unless download_to.end_with?(File::SEPARATOR)
|
44
|
+
data_dir=download_to
|
45
|
+
data_dir=data_dir+File::SEPARATOR+item["xbrlFiling"][0]["cikNumber"][0]["content"]
|
46
|
+
data_dir=data_dir+File::SEPARATOR+item["xbrlFiling"][0]["accessionNumber"][0]["content"]
|
47
|
+
FileUtils.mkdir_p(data_dir)
|
48
|
+
files.each do |file|
|
49
|
+
file_content=open(file["edgar:url"]).read
|
50
|
+
dump_to_file(data_dir+File::SEPARATOR+file["edgar:file"], file_content)
|
51
|
+
end
|
50
52
|
end
|
51
53
|
end
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
54
|
+
|
55
|
+
def print_stat # :nodoc:
|
56
|
+
i_url=""
|
57
|
+
i_size=0
|
58
|
+
title=""
|
59
|
+
items=@content["channel"][0]["item"]
|
60
|
+
items.each do |item|
|
61
|
+
files=get_xbrl_files(item)
|
62
|
+
files.each do |file|
|
63
|
+
if file["type"]=="EX-101.INS" && (i_size==0 || file["size"].to_i < i_size)
|
64
|
+
i_size = file["edgar:size"].to_i
|
65
|
+
i_url = file["edgar:url"]
|
66
|
+
title = item["edgar:title"]
|
67
|
+
end
|
66
68
|
end
|
67
69
|
end
|
70
|
+
puts ""
|
71
|
+
puts " Smallest Instance File " + i_url
|
68
72
|
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
def get_xbrl_files(item)
|
76
|
-
xbrl_files=item["xbrlFiling"][0]["xbrlFiles"][0]["xbrlFile"]
|
77
|
-
return xbrl_files.select {|e| e["edgar:url"].end_with?("xml") || e["edgar:url"].end_with?("xsd")}
|
78
|
-
end
|
79
|
-
|
80
|
-
end
|
81
|
-
|
82
|
-
# This class defines method to download XBRL files from SEC's EDGAR filling url.
|
83
|
-
# See {report generation xbrlware wiki}[http://code.google.com/p/xbrlware/wiki/ReportGeneration] for how to use this class.
|
84
|
-
class HTMLFeedDownloader
|
85
|
-
include REXML::StreamListener
|
86
|
-
include FileUtil
|
87
|
-
|
88
|
-
attr_reader :links
|
89
|
-
|
90
|
-
# Takes url and download_to (where to download)
|
91
|
-
# default value for download_to is current_dir
|
92
|
-
def download(url, download_to=File.expand_path(".")+File::SEPARATOR)
|
93
|
-
$LOG.info " Starting download of fillings from SEC url ["+url+"]"
|
94
|
-
files=[]
|
95
|
-
content = open(url).read
|
96
|
-
@links = Set.new
|
97
|
-
uri=URI(url)
|
98
|
-
@base_path=""
|
99
|
-
@base_path=(uri.scheme+"://"+uri.host+((uri.port==80 && "") || ":"+uri.port.to_s)) unless uri.host.nil?
|
100
|
-
parse(content)
|
101
|
-
download_to += File::SEPARATOR unless download_to.end_with?(File::SEPARATOR)
|
102
|
-
mkdir(download_to)
|
103
|
-
@links.each do |link|
|
104
|
-
file=download_to + link.split("/")[-1]
|
105
|
-
dump_to_file(file, open(link).read)
|
106
|
-
files << file
|
107
|
-
end unless uri.host.nil?
|
108
|
-
files
|
109
|
-
end
|
110
|
-
|
111
|
-
# Callback method for notifying start of xml elements by REXML stream parser.
|
112
|
-
def tag_start(name, attrs) # :nodoc:
|
113
|
-
if "a"==name
|
114
|
-
href=attrs["href"]
|
115
|
-
@links << @base_path + href if href.end_with?("xml") || href.end_with?("xsd")
|
73
|
+
|
74
|
+
private
|
75
|
+
# Gets url that end with xml and xsd
|
76
|
+
def get_xbrl_files(item)
|
77
|
+
xbrl_files=item["xbrlFiling"][0]["xbrlFiles"][0]["xbrlFile"]
|
78
|
+
return xbrl_files.select {|e| e["edgar:url"].end_with?("xml") || e["edgar:url"].end_with?("xsd")}
|
116
79
|
end
|
80
|
+
|
117
81
|
end
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
82
|
+
|
83
|
+
# This class defines method to download XBRL files from SEC's EDGAR filling url.
|
84
|
+
# See {report generation xbrlware wiki}[http://code.google.com/p/xbrlware/wiki/ReportGeneration] for how to use this class.
|
85
|
+
class HTMLFeedDownloader
|
86
|
+
include REXML::StreamListener
|
87
|
+
include ::Xbrlware::Edgar::FileUtil
|
88
|
+
|
89
|
+
attr_reader :links
|
90
|
+
|
91
|
+
# Takes url and download_to (where to download)
|
92
|
+
# default value for download_to is current_dir
|
93
|
+
def download(url, download_to=File.expand_path(".")+File::SEPARATOR)
|
94
|
+
$LOG.info " Starting download of fillings from SEC url ["+url+"]"
|
95
|
+
files=[]
|
96
|
+
content = open(url).read
|
97
|
+
@links = Set.new
|
98
|
+
uri=URI(url)
|
99
|
+
@base_path=""
|
100
|
+
@base_path=(uri.scheme+"://"+uri.host+((uri.port==80 && "") || ":"+uri.port.to_s)) unless uri.host.nil?
|
101
|
+
parse(content)
|
102
|
+
download_to += File::SEPARATOR unless download_to.end_with?(File::SEPARATOR)
|
103
|
+
FileUtils.mkdir_p(download_to)
|
104
|
+
@links.each do |link|
|
105
|
+
file=download_to + link.split("/")[-1]
|
106
|
+
dump_to_file(file, open(link).read)
|
107
|
+
files << file
|
108
|
+
end unless uri.host.nil?
|
109
|
+
files
|
110
|
+
end
|
111
|
+
|
112
|
+
# Callback method for notifying start of xml elements by REXML stream parser.
|
113
|
+
def tag_start(name, attrs) # :nodoc:
|
114
|
+
if "a"==name
|
115
|
+
href=attrs["href"]
|
116
|
+
@links << @base_path + href if href.end_with?("xml") || href.end_with?("xsd")
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
def parse(text)
|
122
|
+
REXML::Document.parse_stream(text, self)
|
123
|
+
end
|
124
|
+
|
122
125
|
end
|
123
|
-
|
126
|
+
|
124
127
|
end
|
125
|
-
|
126
128
|
end
|
@@ -17,14 +17,16 @@
|
|
17
17
|
# See the License for the specific language governing permissions and
|
18
18
|
# limitations under the License.
|
19
19
|
#
|
20
|
-
module
|
21
|
-
module
|
22
|
-
|
23
|
-
|
20
|
+
module Xbrlware
|
21
|
+
module Edgar
|
22
|
+
module FileUtil # :nodoc:
|
23
|
+
#def mkdir(dir)
|
24
|
+
# File.makedirs(dir) unless File.directory?(dir)
|
25
|
+
#end
|
26
|
+
|
27
|
+
def dump_to_file(file, content)
|
28
|
+
File.open(file, 'w') {|f| f.write(content) }
|
29
|
+
end
|
24
30
|
end
|
25
|
-
|
26
|
-
def dump_to_file(file, content)
|
27
|
-
File.open(file, 'w') {|f| f.write(content) }
|
28
|
-
end
|
29
31
|
end
|
30
|
-
end
|
32
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xbrlware-ruby19
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.2.19.
|
4
|
+
version: 1.1.2.19.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -14,7 +14,7 @@ default_executable:
|
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: xml-simple
|
17
|
-
requirement: &
|
17
|
+
requirement: &71299010 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *71299010
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: bigdecimal
|
28
|
-
requirement: &
|
28
|
+
requirement: &71298800 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *71298800
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: logger
|
39
|
-
requirement: &
|
39
|
+
requirement: &71298590 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
version: '0'
|
45
45
|
type: :runtime
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *71298590
|
48
48
|
description: Re-packaging of xbrlware for ruby19
|
49
49
|
email:
|
50
50
|
- jim.lindstrom@gmail.com
|