xbrlware-ruby19 1.1.2.19.1 → 1.1.2.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/xbrlware-ruby19.rb
CHANGED
@@ -17,8 +17,9 @@
|
|
17
17
|
# See the License for the specific language governing permissions and
|
18
18
|
# limitations under the License.
|
19
19
|
#
|
20
|
-
require 'rubygems'
|
21
20
|
require 'xmlsimple'
|
21
|
+
require 'rexml/document'
|
22
|
+
require 'rexml/streamlistener'
|
22
23
|
|
23
24
|
require 'date'
|
24
25
|
require 'bigdecimal'
|
@@ -58,8 +59,8 @@ require 'xbrlware-ruby19/linkbase/calculation_linkbase'
|
|
58
59
|
require 'xbrlware-ruby19/linkbase/definition_linkbase'
|
59
60
|
require 'xbrlware-ruby19/linkbase/presentation_linkbase'
|
60
61
|
|
61
|
-
require 'xbrlware-ruby19/edgar_data_downloader'
|
62
62
|
require 'xbrlware-ruby19/edgar_util'
|
63
|
+
require 'xbrlware-ruby19/edgar_data_downloader'
|
63
64
|
|
64
65
|
require 'logger'
|
65
66
|
require 'benchmark'
|
@@ -17,110 +17,112 @@
|
|
17
17
|
# See the License for the specific language governing permissions and
|
18
18
|
# limitations under the License.
|
19
19
|
#
|
20
|
-
module
|
20
|
+
module Xbrlware
|
21
|
+
module Edgar
|
22
|
+
|
23
|
+
# This class defines method to download XBRL files from SEC's XBRL RSS Feed.
|
24
|
+
# See {report generation xbrlware wiki}[http://code.google.com/p/xbrlware/wiki/ReportGeneration] for how to use this class.
|
25
|
+
class RSSFeedDownloader
|
26
|
+
include ::Xbrlware::Edgar::FileUtil
|
21
27
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
mkdir(data_dir)
|
47
|
-
files.each do |file|
|
48
|
-
file_content=open(file["edgar:url"]).read
|
49
|
-
dump_to_file(data_dir+File::SEPARATOR+file["edgar:file"], file_content)
|
28
|
+
attr_reader :content
|
29
|
+
|
30
|
+
def initialize(sec_edgar_rss_file=nil)
|
31
|
+
sec_edgar_rss_file ||= "http://www.sec.gov/Archives/edgar/usgaap.rss.xml"
|
32
|
+
@content = XmlSimple.xml_in(open(sec_edgar_rss_file).read, {'ForceContent' => true})
|
33
|
+
end
|
34
|
+
|
35
|
+
# Takes limit (how many entities to download), download_to (where to download)
|
36
|
+
# default value for limit is 100
|
37
|
+
# default value for download_to is current_dir + "/edgar_data"
|
38
|
+
def download(limit=100, download_to=File.expand_path(".")+File::SEPARATOR+"edgar_data")
|
39
|
+
items=@content["channel"][0]["item"]
|
40
|
+
items.each_with_index do |item, index|
|
41
|
+
break if index==limit
|
42
|
+
files=get_xbrl_files(item)
|
43
|
+
download_to += File::SEPARATOR unless download_to.end_with?(File::SEPARATOR)
|
44
|
+
data_dir=download_to
|
45
|
+
data_dir=data_dir+File::SEPARATOR+item["xbrlFiling"][0]["cikNumber"][0]["content"]
|
46
|
+
data_dir=data_dir+File::SEPARATOR+item["xbrlFiling"][0]["accessionNumber"][0]["content"]
|
47
|
+
FileUtils.mkdir_p(data_dir)
|
48
|
+
files.each do |file|
|
49
|
+
file_content=open(file["edgar:url"]).read
|
50
|
+
dump_to_file(data_dir+File::SEPARATOR+file["edgar:file"], file_content)
|
51
|
+
end
|
50
52
|
end
|
51
53
|
end
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
54
|
+
|
55
|
+
def print_stat # :nodoc:
|
56
|
+
i_url=""
|
57
|
+
i_size=0
|
58
|
+
title=""
|
59
|
+
items=@content["channel"][0]["item"]
|
60
|
+
items.each do |item|
|
61
|
+
files=get_xbrl_files(item)
|
62
|
+
files.each do |file|
|
63
|
+
if file["type"]=="EX-101.INS" && (i_size==0 || file["size"].to_i < i_size)
|
64
|
+
i_size = file["edgar:size"].to_i
|
65
|
+
i_url = file["edgar:url"]
|
66
|
+
title = item["edgar:title"]
|
67
|
+
end
|
66
68
|
end
|
67
69
|
end
|
70
|
+
puts ""
|
71
|
+
puts " Smallest Instance File " + i_url
|
68
72
|
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
def get_xbrl_files(item)
|
76
|
-
xbrl_files=item["xbrlFiling"][0]["xbrlFiles"][0]["xbrlFile"]
|
77
|
-
return xbrl_files.select {|e| e["edgar:url"].end_with?("xml") || e["edgar:url"].end_with?("xsd")}
|
78
|
-
end
|
79
|
-
|
80
|
-
end
|
81
|
-
|
82
|
-
# This class defines method to download XBRL files from SEC's EDGAR filling url.
|
83
|
-
# See {report generation xbrlware wiki}[http://code.google.com/p/xbrlware/wiki/ReportGeneration] for how to use this class.
|
84
|
-
class HTMLFeedDownloader
|
85
|
-
include REXML::StreamListener
|
86
|
-
include FileUtil
|
87
|
-
|
88
|
-
attr_reader :links
|
89
|
-
|
90
|
-
# Takes url and download_to (where to download)
|
91
|
-
# default value for download_to is current_dir
|
92
|
-
def download(url, download_to=File.expand_path(".")+File::SEPARATOR)
|
93
|
-
$LOG.info " Starting download of fillings from SEC url ["+url+"]"
|
94
|
-
files=[]
|
95
|
-
content = open(url).read
|
96
|
-
@links = Set.new
|
97
|
-
uri=URI(url)
|
98
|
-
@base_path=""
|
99
|
-
@base_path=(uri.scheme+"://"+uri.host+((uri.port==80 && "") || ":"+uri.port.to_s)) unless uri.host.nil?
|
100
|
-
parse(content)
|
101
|
-
download_to += File::SEPARATOR unless download_to.end_with?(File::SEPARATOR)
|
102
|
-
mkdir(download_to)
|
103
|
-
@links.each do |link|
|
104
|
-
file=download_to + link.split("/")[-1]
|
105
|
-
dump_to_file(file, open(link).read)
|
106
|
-
files << file
|
107
|
-
end unless uri.host.nil?
|
108
|
-
files
|
109
|
-
end
|
110
|
-
|
111
|
-
# Callback method for notifying start of xml elements by REXML stream parser.
|
112
|
-
def tag_start(name, attrs) # :nodoc:
|
113
|
-
if "a"==name
|
114
|
-
href=attrs["href"]
|
115
|
-
@links << @base_path + href if href.end_with?("xml") || href.end_with?("xsd")
|
73
|
+
|
74
|
+
private
|
75
|
+
# Gets url that end with xml and xsd
|
76
|
+
def get_xbrl_files(item)
|
77
|
+
xbrl_files=item["xbrlFiling"][0]["xbrlFiles"][0]["xbrlFile"]
|
78
|
+
return xbrl_files.select {|e| e["edgar:url"].end_with?("xml") || e["edgar:url"].end_with?("xsd")}
|
116
79
|
end
|
80
|
+
|
117
81
|
end
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
82
|
+
|
83
|
+
# This class defines method to download XBRL files from SEC's EDGAR filling url.
|
84
|
+
# See {report generation xbrlware wiki}[http://code.google.com/p/xbrlware/wiki/ReportGeneration] for how to use this class.
|
85
|
+
class HTMLFeedDownloader
|
86
|
+
include REXML::StreamListener
|
87
|
+
include ::Xbrlware::Edgar::FileUtil
|
88
|
+
|
89
|
+
attr_reader :links
|
90
|
+
|
91
|
+
# Takes url and download_to (where to download)
|
92
|
+
# default value for download_to is current_dir
|
93
|
+
def download(url, download_to=File.expand_path(".")+File::SEPARATOR)
|
94
|
+
$LOG.info " Starting download of fillings from SEC url ["+url+"]"
|
95
|
+
files=[]
|
96
|
+
content = open(url).read
|
97
|
+
@links = Set.new
|
98
|
+
uri=URI(url)
|
99
|
+
@base_path=""
|
100
|
+
@base_path=(uri.scheme+"://"+uri.host+((uri.port==80 && "") || ":"+uri.port.to_s)) unless uri.host.nil?
|
101
|
+
parse(content)
|
102
|
+
download_to += File::SEPARATOR unless download_to.end_with?(File::SEPARATOR)
|
103
|
+
FileUtils.mkdir_p(download_to)
|
104
|
+
@links.each do |link|
|
105
|
+
file=download_to + link.split("/")[-1]
|
106
|
+
dump_to_file(file, open(link).read)
|
107
|
+
files << file
|
108
|
+
end unless uri.host.nil?
|
109
|
+
files
|
110
|
+
end
|
111
|
+
|
112
|
+
# Callback method for notifying start of xml elements by REXML stream parser.
|
113
|
+
def tag_start(name, attrs) # :nodoc:
|
114
|
+
if "a"==name
|
115
|
+
href=attrs["href"]
|
116
|
+
@links << @base_path + href if href.end_with?("xml") || href.end_with?("xsd")
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
def parse(text)
|
122
|
+
REXML::Document.parse_stream(text, self)
|
123
|
+
end
|
124
|
+
|
122
125
|
end
|
123
|
-
|
126
|
+
|
124
127
|
end
|
125
|
-
|
126
128
|
end
|
@@ -17,14 +17,16 @@
|
|
17
17
|
# See the License for the specific language governing permissions and
|
18
18
|
# limitations under the License.
|
19
19
|
#
|
20
|
-
module
|
21
|
-
module
|
22
|
-
|
23
|
-
|
20
|
+
module Xbrlware
|
21
|
+
module Edgar
|
22
|
+
module FileUtil # :nodoc:
|
23
|
+
#def mkdir(dir)
|
24
|
+
# File.makedirs(dir) unless File.directory?(dir)
|
25
|
+
#end
|
26
|
+
|
27
|
+
def dump_to_file(file, content)
|
28
|
+
File.open(file, 'w') {|f| f.write(content) }
|
29
|
+
end
|
24
30
|
end
|
25
|
-
|
26
|
-
def dump_to_file(file, content)
|
27
|
-
File.open(file, 'w') {|f| f.write(content) }
|
28
|
-
end
|
29
31
|
end
|
30
|
-
end
|
32
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xbrlware-ruby19
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.2.19.
|
4
|
+
version: 1.1.2.19.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -14,7 +14,7 @@ default_executable:
|
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: xml-simple
|
17
|
-
requirement: &
|
17
|
+
requirement: &71299010 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *71299010
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: bigdecimal
|
28
|
-
requirement: &
|
28
|
+
requirement: &71298800 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *71298800
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: logger
|
39
|
-
requirement: &
|
39
|
+
requirement: &71298590 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
version: '0'
|
45
45
|
type: :runtime
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *71298590
|
48
48
|
description: Re-packaging of xbrlware for ruby19
|
49
49
|
email:
|
50
50
|
- jim.lindstrom@gmail.com
|