fbo 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -5
  3. data/Gemfile +0 -2
  4. data/Rakefile +8 -1
  5. data/fbo.gemspec +14 -9
  6. data/lib/fbo.rb +7 -38
  7. data/lib/fbo/chunked_file.rb +74 -0
  8. data/lib/fbo/dump.treetop +324 -0
  9. data/lib/fbo/file.rb +17 -1
  10. data/lib/fbo/interpreter.rb +87 -0
  11. data/lib/fbo/node_extensions.rb +173 -0
  12. data/lib/fbo/parser.rb +43 -46
  13. data/lib/fbo/remote_file.rb +4 -4
  14. data/lib/fbo/version.rb +1 -1
  15. data/spec/fbo/chunked_file_spec.rb +22 -0
  16. data/spec/fbo/file_spec.rb +26 -9
  17. data/spec/fbo/interpreter_spec.rb +73 -0
  18. data/spec/fbo/parser_spec.rb +36 -38
  19. data/spec/fbo/remote_file_spec.rb +28 -24
  20. data/spec/fixtures/.keep +0 -0
  21. data/spec/fixtures/FBOFeed20130331 +52 -5529
  22. data/spec/fixtures/FBOFeed20131003 +35126 -0
  23. data/spec/spec_helper.rb +14 -16
  24. metadata +70 -108
  25. data/.rspec +0 -2
  26. data/Gemfile.lock +0 -27
  27. data/lib/fbo/notice.rb +0 -9
  28. data/lib/fbo/notices.rb +0 -6
  29. data/lib/fbo/notices/amendment.rb +0 -13
  30. data/lib/fbo/notices/archive.rb +0 -8
  31. data/lib/fbo/notices/award.rb +0 -14
  32. data/lib/fbo/notices/combined_solicitation.rb +0 -14
  33. data/lib/fbo/notices/fair_opportunity.rb +0 -15
  34. data/lib/fbo/notices/foreign_standard.rb +0 -13
  35. data/lib/fbo/notices/intent_to_bundle.rb +0 -13
  36. data/lib/fbo/notices/justification_approval.rb +0 -18
  37. data/lib/fbo/notices/modification.rb +0 -14
  38. data/lib/fbo/notices/presolicitation.rb +0 -14
  39. data/lib/fbo/notices/sale_of_surplus.rb +0 -13
  40. data/lib/fbo/notices/sources_sought.rb +0 -13
  41. data/lib/fbo/notices/special_notice.rb +0 -13
  42. data/lib/fbo/notices/unarchive.rb +0 -9
  43. data/lib/fbo/notices/unknown.rb +0 -7
  44. data/lib/fbo/parser/amendment_handler.rb +0 -58
  45. data/lib/fbo/parser/archive_handler.rb +0 -41
  46. data/lib/fbo/parser/award_handler.rb +0 -62
  47. data/lib/fbo/parser/combined_solicitation_handler.rb +0 -57
  48. data/lib/fbo/parser/fair_opportunity_handler.rb +0 -61
  49. data/lib/fbo/parser/foreign_standard_handler.rb +0 -57
  50. data/lib/fbo/parser/handler_selector.rb +0 -37
  51. data/lib/fbo/parser/intent_to_bundle_handler.rb +0 -56
  52. data/lib/fbo/parser/justification_approval_handler.rb +0 -60
  53. data/lib/fbo/parser/modification_handler.rb +0 -66
  54. data/lib/fbo/parser/notice_handler.rb +0 -27
  55. data/lib/fbo/parser/parser_helper.rb +0 -315
  56. data/lib/fbo/parser/presolicitation_handler.rb +0 -57
  57. data/lib/fbo/parser/sale_of_surplus_handler.rb +0 -57
  58. data/lib/fbo/parser/sources_sought_handler.rb +0 -57
  59. data/lib/fbo/parser/special_notice_handler.rb +0 -57
  60. data/lib/fbo/parser/unarchive_handler.rb +0 -42
  61. data/lib/fbo/parser/unknown_handler.rb +0 -20
  62. data/spec/fbo/parser/amendment_handler_spec.rb +0 -51
  63. data/spec/fbo/parser/archive_handler_spec.rb +0 -36
  64. data/spec/fbo/parser/award_handler_spec.rb +0 -56
  65. data/spec/fbo/parser/combined_solicitation_handler_spec.rb +0 -51
  66. data/spec/fbo/parser/fair_opportunity_handler_spec.rb +0 -56
  67. data/spec/fbo/parser/foreign_standard_handler_spec.rb +0 -51
  68. data/spec/fbo/parser/handler_selector_spec.rb +0 -40
  69. data/spec/fbo/parser/intent_to_bundle_handler_spec.rb +0 -52
  70. data/spec/fbo/parser/justification_approval_handler_spec.rb +0 -53
  71. data/spec/fbo/parser/modification_handler_spec.rb +0 -52
  72. data/spec/fbo/parser/presolicitation_handler_spec.rb +0 -51
  73. data/spec/fbo/parser/sale_of_surplus_handler_spec.rb +0 -53
  74. data/spec/fbo/parser/sources_sought_handler_spec.rb +0 -51
  75. data/spec/fbo/parser/special_notice_handler_spec.rb +0 -53
  76. data/spec/fbo/parser/unarchive_handler_spec.rb +0 -37
  77. data/spec/fbo/parser/unknown_handler_spec.rb +0 -24
  78. data/spec/fixtures/FBOFeed20130404 +0 -45653
  79. data/spec/fixtures/FBOFeed20130406 +0 -10152
  80. data/spec/fixtures/FBOFeed20130407 +0 -6610
  81. data/spec/fixtures/notices/amdcss +0 -26
  82. data/spec/fixtures/notices/archive +0 -10
  83. data/spec/fixtures/notices/award +0 -31
  84. data/spec/fixtures/notices/combine +0 -29
  85. data/spec/fixtures/notices/fairopp +0 -29
  86. data/spec/fixtures/notices/fstd +0 -24
  87. data/spec/fixtures/notices/itb +0 -19
  88. data/spec/fixtures/notices/ja +0 -30
  89. data/spec/fixtures/notices/mod +0 -28
  90. data/spec/fixtures/notices/notanotice +0 -7
  91. data/spec/fixtures/notices/presol +0 -25
  92. data/spec/fixtures/notices/snote +0 -26
  93. data/spec/fixtures/notices/srcsgt +0 -27
  94. data/spec/fixtures/notices/ssale +0 -24
  95. data/spec/fixtures/notices/unarchive +0 -10
data/lib/fbo/file.rb CHANGED
@@ -5,7 +5,7 @@ module FBO
5
5
  extend Forwardable
6
6
 
7
7
  attr_reader :file
8
- def_delegators :@file, :open, :readline, :read, :path, :to_path
8
+ def_delegators :@file, :readline, :read, :eof?, :gets
9
9
 
10
10
  class << self
11
11
  def filename_for_date(date)
@@ -17,5 +17,21 @@ module FBO
17
17
  def initialize(filename)
18
18
  @file = ::File.new(filename)
19
19
  end
20
+
21
+ def contents
22
+ if @contents.nil?
23
+ @contents = cleanup_data(@file.read)
24
+ end
25
+ @contents
26
+ end
27
+
28
+ private
29
+
30
+ def cleanup_data(data)
31
+ data.encode('UTF-16le', :invalid => :replace, :replace => '')
32
+ .encode('UTF-8')
33
+ .gsub(/\r\n/, "\n")
34
+ .gsub(/^M/, "")
35
+ end
20
36
  end
21
37
  end
@@ -0,0 +1,87 @@
1
+ module FBO
2
+ class Interpreter
3
+ def initialize(tree)
4
+ @notice_nodes = tree.elements
5
+ end
6
+
7
+ def each_notice
8
+ each_node(@notice_nodes, &Proc.new)
9
+ end
10
+
11
+ def each_presolicitation
12
+ each_node(nodes_by_type(FBO::Dump::PresolicitationNode), &Proc.new)
13
+ end
14
+
15
+ def each_combined_solicitation
16
+ each_node(nodes_by_type(FBO::Dump::CombinedSolicitationNode), &Proc.new)
17
+ end
18
+
19
+ def each_amendment
20
+ each_node(nodes_by_type(FBO::Dump::AmendmentNode), &Proc.new)
21
+ end
22
+
23
+ def each_modification
24
+ each_node(nodes_by_type(FBO::Dump::ModificationNode), &Proc.new)
25
+ end
26
+
27
+ def each_award
28
+ each_node(nodes_by_type(FBO::Dump::AwardNode), &Proc.new)
29
+ end
30
+
31
+ def each_justification_and_approval
32
+ each_node(nodes_by_type(FBO::Dump::JustificationAndApprovalNode), &Proc.new)
33
+ end
34
+
35
+ def each_intent_to_bundle
36
+ each_node(nodes_by_type(FBO::Dump::IntentToBundleNode), &Proc.new)
37
+ end
38
+
39
+ def each_fair_opportunity
40
+ each_node(nodes_by_type(FBO::Dump::FairOpportunityNode), &Proc.new)
41
+ end
42
+
43
+ def each_sources_sought
44
+ each_node(nodes_by_type(FBO::Dump::SourcesSoughtNode), &Proc.new)
45
+ end
46
+
47
+ def each_foreign_standard
48
+ each_node(nodes_by_type(FBO::Dump::ForeignStandardNode), &Proc.new)
49
+ end
50
+
51
+ def each_special_notice
52
+ each_node(nodes_by_type(FBO::Dump::SpecialNoticeNode), &Proc.new)
53
+ end
54
+
55
+ def each_sale_of_surplus
56
+ each_node(nodes_by_type(FBO::Dump::SaleOfSurplusNode), &Proc.new)
57
+ end
58
+
59
+ def each_document_upload
60
+ each_node(nodes_by_type(FBO::Dump::DocumentUploadNode), &Proc.new)
61
+ end
62
+
63
+ def each_document_delete
64
+ each_node(nodes_by_type(FBO::Dump::DocumentDeletingNode), &Proc.new)
65
+ end
66
+
67
+ def each_document_archival
68
+ each_node(nodes_by_type(FBO::Dump::DocumentArchivalNode), &Proc.new)
69
+ end
70
+
71
+ def each_document_unarchival
72
+ each_node(nodes_by_type(FBO::Dump::DocumentUnarchivalNode), &Proc.new)
73
+ end
74
+
75
+ private
76
+
77
+ def nodes_by_type(type)
78
+ @notice_nodes.select { |n| n.is_a? type }
79
+ end
80
+
81
+ def each_node(nodes)
82
+ nodes.each do |node|
83
+ yield node.to_hash.merge({ type: node.type })
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,173 @@
1
+ module FBO
2
+ module Dump
3
+
4
+ # Structural node types
5
+ #
6
+ class DumpNode < Treetop::Runtime::SyntaxNode; end
7
+ class BodyNode < Treetop::Runtime::SyntaxNode; end
8
+ class TagContentNode < Treetop::Runtime::SyntaxNode; end
9
+
10
+ # Node types corresponding to
11
+ #
12
+ class NoticeNode < Treetop::Runtime::SyntaxNode
13
+ def to_hash
14
+ body_node = elements.first
15
+ Hash[ body_node.elements.map { |e| [ e.to_sym, e.value ] } ]
16
+ end
17
+ end
18
+ class PresolicitationNode < NoticeNode
19
+ def type
20
+ :presol
21
+ end
22
+ end
23
+ class CombinedSolicitationNode < NoticeNode
24
+ def type
25
+ :combine
26
+ end
27
+ end
28
+ class AmendmentNode < NoticeNode
29
+ def type
30
+ :amdcss
31
+ end
32
+ end
33
+ class ModificationNode < NoticeNode
34
+ def type
35
+ :mod
36
+ end
37
+ end
38
+ class AwardNode < NoticeNode
39
+ def type
40
+ :award
41
+ end
42
+ end
43
+ class JustificationAndApprovalNode < NoticeNode
44
+ def type
45
+ :ja
46
+ end
47
+ end
48
+ class IntentToBundleNode < NoticeNode
49
+ def type
50
+ :itb
51
+ end
52
+ end
53
+ class FairOpportunityNode < NoticeNode
54
+ def type
55
+ :fairopp
56
+ end
57
+ end
58
+ class SourcesSoughtNode < NoticeNode
59
+ def type
60
+ :srcsgt
61
+ end
62
+ end
63
+ class ForeignStandardNode < NoticeNode
64
+ def type
65
+ :fstd
66
+ end
67
+ end
68
+ class SpecialNoticeNode < NoticeNode
69
+ def type
70
+ :snote
71
+ end
72
+ end
73
+ class SaleOfSurplusNode < NoticeNode
74
+ def type
75
+ :ssale
76
+ end
77
+ end
78
+ class DocumentUploadNode < NoticeNode
79
+ def type
80
+ :epsupload
81
+ end
82
+ end
83
+ class DocumentDeletingNode < NoticeNode
84
+ def type
85
+ :delete
86
+ end
87
+ end
88
+ class DocumentArchivalNode < NoticeNode
89
+ def type
90
+ :archive
91
+ end
92
+ end
93
+ class DocumentUnarchivalNode < NoticeNode
94
+ def type
95
+ :unarchive
96
+ end
97
+ end
98
+
99
+ # Simple property nodes have a name/symbol and a value
100
+ #
101
+ class SimplePropertyNode < Treetop::Runtime::SyntaxNode
102
+ def to_sym
103
+ class_name = self.class.name
104
+ base_name = class_name.split('::').last
105
+ base_name.sub!(/Node$/, '')
106
+ base_name.gsub!(/([^A-Z])([A-Z])/, '\1_\2')
107
+ base_name.tr!('A-Z', 'a-z')
108
+ base_name.to_sym
109
+ end
110
+
111
+ def value
112
+ elements[0].text_value
113
+ end
114
+
115
+ def to_hash
116
+ { self.to_sym => self.value }
117
+ end
118
+ end
119
+ class DateNode < SimplePropertyNode; end
120
+ class YearNode < SimplePropertyNode; end
121
+ class AgencyNode < SimplePropertyNode; end
122
+ class OfficeNode < SimplePropertyNode; end
123
+ class LocationNode < SimplePropertyNode; end
124
+ class ZipNode < SimplePropertyNode; end
125
+ class ClassificationCodeNode < SimplePropertyNode; end
126
+ class NaicsCodeNode < SimplePropertyNode; end
127
+ class OfficeAddressNode < SimplePropertyNode; end
128
+ class SubjectNode < SimplePropertyNode; end
129
+ class SolicitationNumberNode < SimplePropertyNode; end
130
+ class NoticeTypeNode < SimplePropertyNode; end
131
+ class ResponseDateNode < SimplePropertyNode; end
132
+ class ArchiveDateNode < SimplePropertyNode; end
133
+ class ContactNode < SimplePropertyNode; end
134
+ class DescriptionNode < SimplePropertyNode; end
135
+ class UrlNode < SimplePropertyNode; end
136
+ class EmailAddressNode < SimplePropertyNode; end
137
+ class SetAsideNode < SimplePropertyNode; end
138
+ class PopAddressNode < SimplePropertyNode; end
139
+ class PopZipNode < SimplePropertyNode; end
140
+ class PopCountryNode < SimplePropertyNode; end
141
+ class AwardNumberNode < SimplePropertyNode; end
142
+ class AwardAmountNode < SimplePropertyNode; end
143
+ class LineNumberNode < SimplePropertyNode; end
144
+ class AwardDateNode < SimplePropertyNode; end
145
+ class AwardeeNode < SimplePropertyNode; end
146
+ class AwardeeDunsNode < SimplePropertyNode; end
147
+ class CorrectionNode < SimplePropertyNode; end
148
+ class FileNode < SimplePropertyNode; end
149
+ class MimeTypeNode < SimplePropertyNode; end
150
+ class StatutoryAuthorityNode < SimplePropertyNode; end
151
+ class ModificationNumberNode < SimplePropertyNode; end
152
+ class DeliveryOrderNumberNode < SimplePropertyNode; end
153
+ class JustificationAuthorityNode < SimplePropertyNode; end
154
+ class CBACNode < SimplePropertyNode; end
155
+ class PasswordNode < SimplePropertyNode; end
156
+ class ProjectIDNode < SimplePropertyNode; end
157
+ class UploadTypeNode < SimplePropertyNode; end
158
+ class CorrectionNode < SimplePropertyNode; end
159
+
160
+ # Complex properties may contain other simple properties
161
+ #
162
+ class ComplexPropertyNode < SimplePropertyNode
163
+ def value
164
+ value_hash = {}
165
+ elements.each { |e| value_hash.merge!(e.to_hash) }
166
+ value_hash
167
+ end
168
+ end
169
+ class LinkNode < ComplexPropertyNode; end
170
+ class EmailNode < ComplexPropertyNode; end
171
+ class FileListNode < ComplexPropertyNode; end
172
+ end
173
+ end
data/lib/fbo/parser.rb CHANGED
@@ -1,66 +1,63 @@
1
- require "strscan"
1
+ require 'treetop'
2
+ require 'fbo/node_extensions'
3
+
4
+ base_path = ::File.expand_path(::File.dirname(__FILE__))
5
+ Treetop.load(::File.join(base_path, './dump.treetop'))
2
6
 
3
7
  module FBO
4
8
  class Parser
5
- TAG_PATTERN = /<\w+>/m
6
- PATTERN = /<([A-Z]+)>.*<\/\1>/m
7
-
8
- def initialize(file, tag_pattern: TAG_PATTERN)
9
+ def initialize(file)
9
10
  @file = file
10
- @tag_pattern = tag_pattern
11
11
  end
12
12
 
13
- def notices
14
- if @notices.nil?
15
- parse_notices(split_file_contents)
13
+ def parse(data = nil)
14
+ data ||= @file.contents
15
+
16
+ puts "Class = #{ data.class.name }"
17
+ if data.respond_to? :each
18
+ @tree = parse_collection(data)
19
+ else
20
+ @tree = parse_string(data)
16
21
  end
17
- @notices
22
+ @tree
18
23
  end
19
24
 
20
25
  private
21
26
 
22
- def split_file_contents
23
- contents = @file.read
24
- contents = cleanup_contents(contents)
25
- scanner = StringScanner.new(contents)
26
- text_notices = []
27
-
28
- while !scanner.eos?
29
- initial_tag = scanner.check_until(@tag_pattern)
30
- if initial_tag
31
- initial_tag.strip!
32
- else
33
- break
34
- end
35
-
36
- text_notice = scanner.scan_until(/#{ closing_tag_for(initial_tag) }\s*/)
37
- if text_notice
38
- text_notices << text_notice.strip
39
- else
40
- break
41
- end
42
- end
43
- text_notices
27
+ def parser
28
+ @parser ||= FBO::DumpParser.new
44
29
  end
45
30
 
46
- def parse_notices(text_notices)
47
- @notices = []
48
- text_notices.each do |text|
49
- handler = FBO::Parser::HandlerSelector.select(text)
50
- @notices << handler.parse(text)
31
+ def parse_string(data)
32
+ tree = parser.parse(data)
33
+ if tree.nil?
34
+ line = parser.failure_line
35
+ column = parser.failure_column
36
+ reason = parser.failure_reason
37
+ raise Exception, "Parse error at line #{ line }, column #{ column }: #{ reason }"
51
38
  end
39
+ clean_tree(tree)
52
40
  end
53
41
 
54
- def cleanup_contents(contents)
55
- contents
56
- .encode('UTF-16le', :invalid => :replace, :replace => '')
57
- .encode('UTF-8')
58
- .gsub(/\r\n/, "\n")
59
- .gsub(/^M/, "")
42
+ def parse_collection(data)
43
+ super_tree = nil
44
+ data.each do |string|
45
+ tree = parse_string(string)
46
+ if super_tree
47
+ super_tree = FBO::Dump::DumpNode.new(super_tree.input, super_tree.interval,
48
+ super_tree.elements + tree.elements)
49
+ else
50
+ super_tree = tree
51
+ end
52
+ end
53
+ super_tree
60
54
  end
61
-
62
- def closing_tag_for(tag)
63
- tag.sub(/</, "</")
55
+
56
+ def clean_tree(node)
57
+ return if node.elements.nil?
58
+ node.elements.delete_if { |node| node.class.name == "Treetop::Runtime::SyntaxNode" }
59
+ node.elements.each { |node| clean_tree(node) }
60
+ node
64
61
  end
65
62
  end
66
63
  end
@@ -1,10 +1,10 @@
1
- require "date"
2
- require "net/ftp"
1
+ require 'date'
2
+ require 'net/ftp'
3
3
 
4
4
  module FBO
5
5
  class RemoteFile < File
6
- FTP_SERVER = "ftp.fbo.gov"
7
- TMP_DIR = "/tmp/fbo"
6
+ FTP_SERVER = 'ftp.fbo.gov'
7
+ TMP_DIR = '/tmp/fbo'
8
8
 
9
9
  class << self
10
10
  def for_date(date, options = {})
data/lib/fbo/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module FBO
2
- VERSION = "0.0.3"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe FBO::ChunkedFile do
4
+ let(:filename) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'FBOFeed20131003') }
5
+ let(:file) { FBO::File.new(filename) }
6
+ let(:chunk_size) { 50 * 1024 } # 50KB
7
+ subject { FBO::ChunkedFile.new(file, chunk_size) }
8
+
9
+ describe "#contents" do
10
+ it "returns an Array of String" do
11
+ subject.contents.must_be_instance_of Array
12
+ subject.contents.each { |x| x.must_be_instance_of String }
13
+ end
14
+
15
+ it "returns chunks that include whole notices" do
16
+ contents = subject.contents
17
+ contents.each do |chunk|
18
+ chunk.must_match FBO::ChunkedFile::NOTICE_CLOSE_REGEXP
19
+ end
20
+ end
21
+ end
22
+ end