fbo 0.0.3 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -5
  3. data/Gemfile +0 -2
  4. data/Rakefile +8 -1
  5. data/fbo.gemspec +14 -9
  6. data/lib/fbo.rb +7 -38
  7. data/lib/fbo/chunked_file.rb +74 -0
  8. data/lib/fbo/dump.treetop +324 -0
  9. data/lib/fbo/file.rb +17 -1
  10. data/lib/fbo/interpreter.rb +87 -0
  11. data/lib/fbo/node_extensions.rb +173 -0
  12. data/lib/fbo/parser.rb +43 -46
  13. data/lib/fbo/remote_file.rb +4 -4
  14. data/lib/fbo/version.rb +1 -1
  15. data/spec/fbo/chunked_file_spec.rb +22 -0
  16. data/spec/fbo/file_spec.rb +26 -9
  17. data/spec/fbo/interpreter_spec.rb +73 -0
  18. data/spec/fbo/parser_spec.rb +36 -38
  19. data/spec/fbo/remote_file_spec.rb +28 -24
  20. data/spec/fixtures/.keep +0 -0
  21. data/spec/fixtures/FBOFeed20130331 +52 -5529
  22. data/spec/fixtures/FBOFeed20131003 +35126 -0
  23. data/spec/spec_helper.rb +14 -16
  24. metadata +70 -108
  25. data/.rspec +0 -2
  26. data/Gemfile.lock +0 -27
  27. data/lib/fbo/notice.rb +0 -9
  28. data/lib/fbo/notices.rb +0 -6
  29. data/lib/fbo/notices/amendment.rb +0 -13
  30. data/lib/fbo/notices/archive.rb +0 -8
  31. data/lib/fbo/notices/award.rb +0 -14
  32. data/lib/fbo/notices/combined_solicitation.rb +0 -14
  33. data/lib/fbo/notices/fair_opportunity.rb +0 -15
  34. data/lib/fbo/notices/foreign_standard.rb +0 -13
  35. data/lib/fbo/notices/intent_to_bundle.rb +0 -13
  36. data/lib/fbo/notices/justification_approval.rb +0 -18
  37. data/lib/fbo/notices/modification.rb +0 -14
  38. data/lib/fbo/notices/presolicitation.rb +0 -14
  39. data/lib/fbo/notices/sale_of_surplus.rb +0 -13
  40. data/lib/fbo/notices/sources_sought.rb +0 -13
  41. data/lib/fbo/notices/special_notice.rb +0 -13
  42. data/lib/fbo/notices/unarchive.rb +0 -9
  43. data/lib/fbo/notices/unknown.rb +0 -7
  44. data/lib/fbo/parser/amendment_handler.rb +0 -58
  45. data/lib/fbo/parser/archive_handler.rb +0 -41
  46. data/lib/fbo/parser/award_handler.rb +0 -62
  47. data/lib/fbo/parser/combined_solicitation_handler.rb +0 -57
  48. data/lib/fbo/parser/fair_opportunity_handler.rb +0 -61
  49. data/lib/fbo/parser/foreign_standard_handler.rb +0 -57
  50. data/lib/fbo/parser/handler_selector.rb +0 -37
  51. data/lib/fbo/parser/intent_to_bundle_handler.rb +0 -56
  52. data/lib/fbo/parser/justification_approval_handler.rb +0 -60
  53. data/lib/fbo/parser/modification_handler.rb +0 -66
  54. data/lib/fbo/parser/notice_handler.rb +0 -27
  55. data/lib/fbo/parser/parser_helper.rb +0 -315
  56. data/lib/fbo/parser/presolicitation_handler.rb +0 -57
  57. data/lib/fbo/parser/sale_of_surplus_handler.rb +0 -57
  58. data/lib/fbo/parser/sources_sought_handler.rb +0 -57
  59. data/lib/fbo/parser/special_notice_handler.rb +0 -57
  60. data/lib/fbo/parser/unarchive_handler.rb +0 -42
  61. data/lib/fbo/parser/unknown_handler.rb +0 -20
  62. data/spec/fbo/parser/amendment_handler_spec.rb +0 -51
  63. data/spec/fbo/parser/archive_handler_spec.rb +0 -36
  64. data/spec/fbo/parser/award_handler_spec.rb +0 -56
  65. data/spec/fbo/parser/combined_solicitation_handler_spec.rb +0 -51
  66. data/spec/fbo/parser/fair_opportunity_handler_spec.rb +0 -56
  67. data/spec/fbo/parser/foreign_standard_handler_spec.rb +0 -51
  68. data/spec/fbo/parser/handler_selector_spec.rb +0 -40
  69. data/spec/fbo/parser/intent_to_bundle_handler_spec.rb +0 -52
  70. data/spec/fbo/parser/justification_approval_handler_spec.rb +0 -53
  71. data/spec/fbo/parser/modification_handler_spec.rb +0 -52
  72. data/spec/fbo/parser/presolicitation_handler_spec.rb +0 -51
  73. data/spec/fbo/parser/sale_of_surplus_handler_spec.rb +0 -53
  74. data/spec/fbo/parser/sources_sought_handler_spec.rb +0 -51
  75. data/spec/fbo/parser/special_notice_handler_spec.rb +0 -53
  76. data/spec/fbo/parser/unarchive_handler_spec.rb +0 -37
  77. data/spec/fbo/parser/unknown_handler_spec.rb +0 -24
  78. data/spec/fixtures/FBOFeed20130404 +0 -45653
  79. data/spec/fixtures/FBOFeed20130406 +0 -10152
  80. data/spec/fixtures/FBOFeed20130407 +0 -6610
  81. data/spec/fixtures/notices/amdcss +0 -26
  82. data/spec/fixtures/notices/archive +0 -10
  83. data/spec/fixtures/notices/award +0 -31
  84. data/spec/fixtures/notices/combine +0 -29
  85. data/spec/fixtures/notices/fairopp +0 -29
  86. data/spec/fixtures/notices/fstd +0 -24
  87. data/spec/fixtures/notices/itb +0 -19
  88. data/spec/fixtures/notices/ja +0 -30
  89. data/spec/fixtures/notices/mod +0 -28
  90. data/spec/fixtures/notices/notanotice +0 -7
  91. data/spec/fixtures/notices/presol +0 -25
  92. data/spec/fixtures/notices/snote +0 -26
  93. data/spec/fixtures/notices/srcsgt +0 -27
  94. data/spec/fixtures/notices/ssale +0 -24
  95. data/spec/fixtures/notices/unarchive +0 -10
data/lib/fbo/file.rb CHANGED
@@ -5,7 +5,7 @@ module FBO
5
5
  extend Forwardable
6
6
 
7
7
  attr_reader :file
8
- def_delegators :@file, :open, :readline, :read, :path, :to_path
8
+ def_delegators :@file, :readline, :read, :eof?, :gets
9
9
 
10
10
  class << self
11
11
  def filename_for_date(date)
@@ -17,5 +17,21 @@ module FBO
17
17
  def initialize(filename)
18
18
  @file = ::File.new(filename)
19
19
  end
20
+
21
+ def contents
22
+ if @contents.nil?
23
+ @contents = cleanup_data(@file.read)
24
+ end
25
+ @contents
26
+ end
27
+
28
+ private
29
+
30
+ def cleanup_data(data)
31
+ data.encode('UTF-16le', :invalid => :replace, :replace => '')
32
+ .encode('UTF-8')
33
+ .gsub(/\r\n/, "\n")
34
+ .gsub(/^M/, "")
35
+ end
20
36
  end
21
37
  end
@@ -0,0 +1,87 @@
1
+ module FBO
2
+ class Interpreter
3
+ def initialize(tree)
4
+ @notice_nodes = tree.elements
5
+ end
6
+
7
+ def each_notice
8
+ each_node(@notice_nodes, &Proc.new)
9
+ end
10
+
11
+ def each_presolicitation
12
+ each_node(nodes_by_type(FBO::Dump::PresolicitationNode), &Proc.new)
13
+ end
14
+
15
+ def each_combined_solicitation
16
+ each_node(nodes_by_type(FBO::Dump::CombinedSolicitationNode), &Proc.new)
17
+ end
18
+
19
+ def each_amendment
20
+ each_node(nodes_by_type(FBO::Dump::AmendmentNode), &Proc.new)
21
+ end
22
+
23
+ def each_modification
24
+ each_node(nodes_by_type(FBO::Dump::ModificationNode), &Proc.new)
25
+ end
26
+
27
+ def each_award
28
+ each_node(nodes_by_type(FBO::Dump::AwardNode), &Proc.new)
29
+ end
30
+
31
+ def each_justification_and_approval
32
+ each_node(nodes_by_type(FBO::Dump::JustificationAndApprovalNode), &Proc.new)
33
+ end
34
+
35
+ def each_intent_to_bundle
36
+ each_node(nodes_by_type(FBO::Dump::IntentToBundleNode), &Proc.new)
37
+ end
38
+
39
+ def each_fair_opportunity
40
+ each_node(nodes_by_type(FBO::Dump::FairOpportunityNode), &Proc.new)
41
+ end
42
+
43
+ def each_sources_sought
44
+ each_node(nodes_by_type(FBO::Dump::SourcesSoughtNode), &Proc.new)
45
+ end
46
+
47
+ def each_foreign_standard
48
+ each_node(nodes_by_type(FBO::Dump::ForeignStandardNode), &Proc.new)
49
+ end
50
+
51
+ def each_special_notice
52
+ each_node(nodes_by_type(FBO::Dump::SpecialNoticeNode), &Proc.new)
53
+ end
54
+
55
+ def each_sale_of_surplus
56
+ each_node(nodes_by_type(FBO::Dump::SaleOfSurplusNode), &Proc.new)
57
+ end
58
+
59
+ def each_document_upload
60
+ each_node(nodes_by_type(FBO::Dump::DocumentUploadNode), &Proc.new)
61
+ end
62
+
63
+ def each_document_delete
64
+ each_node(nodes_by_type(FBO::Dump::DocumentDeletingNode), &Proc.new)
65
+ end
66
+
67
+ def each_document_archival
68
+ each_node(nodes_by_type(FBO::Dump::DocumentArchivalNode), &Proc.new)
69
+ end
70
+
71
+ def each_document_unarchival
72
+ each_node(nodes_by_type(FBO::Dump::DocumentUnarchivalNode), &Proc.new)
73
+ end
74
+
75
+ private
76
+
77
+ def nodes_by_type(type)
78
+ @notice_nodes.select { |n| n.is_a? type }
79
+ end
80
+
81
+ def each_node(nodes)
82
+ nodes.each do |node|
83
+ yield node.to_hash.merge({ type: node.type })
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,173 @@
1
+ module FBO
2
+ module Dump
3
+
4
+ # Structural node types
5
+ #
6
+ class DumpNode < Treetop::Runtime::SyntaxNode; end
7
+ class BodyNode < Treetop::Runtime::SyntaxNode; end
8
+ class TagContentNode < Treetop::Runtime::SyntaxNode; end
9
+
10
+ # Node types corresponding to
11
+ #
12
+ class NoticeNode < Treetop::Runtime::SyntaxNode
13
+ def to_hash
14
+ body_node = elements.first
15
+ Hash[ body_node.elements.map { |e| [ e.to_sym, e.value ] } ]
16
+ end
17
+ end
18
+ class PresolicitationNode < NoticeNode
19
+ def type
20
+ :presol
21
+ end
22
+ end
23
+ class CombinedSolicitationNode < NoticeNode
24
+ def type
25
+ :combine
26
+ end
27
+ end
28
+ class AmendmentNode < NoticeNode
29
+ def type
30
+ :amdcss
31
+ end
32
+ end
33
+ class ModificationNode < NoticeNode
34
+ def type
35
+ :mod
36
+ end
37
+ end
38
+ class AwardNode < NoticeNode
39
+ def type
40
+ :award
41
+ end
42
+ end
43
+ class JustificationAndApprovalNode < NoticeNode
44
+ def type
45
+ :ja
46
+ end
47
+ end
48
+ class IntentToBundleNode < NoticeNode
49
+ def type
50
+ :itb
51
+ end
52
+ end
53
+ class FairOpportunityNode < NoticeNode
54
+ def type
55
+ :fairopp
56
+ end
57
+ end
58
+ class SourcesSoughtNode < NoticeNode
59
+ def type
60
+ :srcsgt
61
+ end
62
+ end
63
+ class ForeignStandardNode < NoticeNode
64
+ def type
65
+ :fstd
66
+ end
67
+ end
68
+ class SpecialNoticeNode < NoticeNode
69
+ def type
70
+ :snote
71
+ end
72
+ end
73
+ class SaleOfSurplusNode < NoticeNode
74
+ def type
75
+ :ssale
76
+ end
77
+ end
78
+ class DocumentUploadNode < NoticeNode
79
+ def type
80
+ :epsupload
81
+ end
82
+ end
83
+ class DocumentDeletingNode < NoticeNode
84
+ def type
85
+ :delete
86
+ end
87
+ end
88
+ class DocumentArchivalNode < NoticeNode
89
+ def type
90
+ :archive
91
+ end
92
+ end
93
+ class DocumentUnarchivalNode < NoticeNode
94
+ def type
95
+ :unarchive
96
+ end
97
+ end
98
+
99
+ # Simple property nodes have a name/symbol and a value
100
+ #
101
+ class SimplePropertyNode < Treetop::Runtime::SyntaxNode
102
+ def to_sym
103
+ class_name = self.class.name
104
+ base_name = class_name.split('::').last
105
+ base_name.sub!(/Node$/, '')
106
+ base_name.gsub!(/([^A-Z])([A-Z])/, '\1_\2')
107
+ base_name.tr!('A-Z', 'a-z')
108
+ base_name.to_sym
109
+ end
110
+
111
+ def value
112
+ elements[0].text_value
113
+ end
114
+
115
+ def to_hash
116
+ { self.to_sym => self.value }
117
+ end
118
+ end
119
+ class DateNode < SimplePropertyNode; end
120
+ class YearNode < SimplePropertyNode; end
121
+ class AgencyNode < SimplePropertyNode; end
122
+ class OfficeNode < SimplePropertyNode; end
123
+ class LocationNode < SimplePropertyNode; end
124
+ class ZipNode < SimplePropertyNode; end
125
+ class ClassificationCodeNode < SimplePropertyNode; end
126
+ class NaicsCodeNode < SimplePropertyNode; end
127
+ class OfficeAddressNode < SimplePropertyNode; end
128
+ class SubjectNode < SimplePropertyNode; end
129
+ class SolicitationNumberNode < SimplePropertyNode; end
130
+ class NoticeTypeNode < SimplePropertyNode; end
131
+ class ResponseDateNode < SimplePropertyNode; end
132
+ class ArchiveDateNode < SimplePropertyNode; end
133
+ class ContactNode < SimplePropertyNode; end
134
+ class DescriptionNode < SimplePropertyNode; end
135
+ class UrlNode < SimplePropertyNode; end
136
+ class EmailAddressNode < SimplePropertyNode; end
137
+ class SetAsideNode < SimplePropertyNode; end
138
+ class PopAddressNode < SimplePropertyNode; end
139
+ class PopZipNode < SimplePropertyNode; end
140
+ class PopCountryNode < SimplePropertyNode; end
141
+ class AwardNumberNode < SimplePropertyNode; end
142
+ class AwardAmountNode < SimplePropertyNode; end
143
+ class LineNumberNode < SimplePropertyNode; end
144
+ class AwardDateNode < SimplePropertyNode; end
145
+ class AwardeeNode < SimplePropertyNode; end
146
+ class AwardeeDunsNode < SimplePropertyNode; end
147
+ class CorrectionNode < SimplePropertyNode; end
148
+ class FileNode < SimplePropertyNode; end
149
+ class MimeTypeNode < SimplePropertyNode; end
150
+ class StatutoryAuthorityNode < SimplePropertyNode; end
151
+ class ModificationNumberNode < SimplePropertyNode; end
152
+ class DeliveryOrderNumberNode < SimplePropertyNode; end
153
+ class JustificationAuthorityNode < SimplePropertyNode; end
154
+ class CBACNode < SimplePropertyNode; end
155
+ class PasswordNode < SimplePropertyNode; end
156
+ class ProjectIDNode < SimplePropertyNode; end
157
+ class UploadTypeNode < SimplePropertyNode; end
158
+ class CorrectionNode < SimplePropertyNode; end
159
+
160
+ # Complex properties may contain other simple properties
161
+ #
162
+ class ComplexPropertyNode < SimplePropertyNode
163
+ def value
164
+ value_hash = {}
165
+ elements.each { |e| value_hash.merge!(e.to_hash) }
166
+ value_hash
167
+ end
168
+ end
169
+ class LinkNode < ComplexPropertyNode; end
170
+ class EmailNode < ComplexPropertyNode; end
171
+ class FileListNode < ComplexPropertyNode; end
172
+ end
173
+ end
data/lib/fbo/parser.rb CHANGED
@@ -1,66 +1,63 @@
1
- require "strscan"
1
+ require 'treetop'
2
+ require 'fbo/node_extensions'
3
+
4
+ base_path = ::File.expand_path(::File.dirname(__FILE__))
5
+ Treetop.load(::File.join(base_path, './dump.treetop'))
2
6
 
3
7
  module FBO
4
8
  class Parser
5
- TAG_PATTERN = /<\w+>/m
6
- PATTERN = /<([A-Z]+)>.*<\/\1>/m
7
-
8
- def initialize(file, tag_pattern: TAG_PATTERN)
9
+ def initialize(file)
9
10
  @file = file
10
- @tag_pattern = tag_pattern
11
11
  end
12
12
 
13
- def notices
14
- if @notices.nil?
15
- parse_notices(split_file_contents)
13
+ def parse(data = nil)
14
+ data ||= @file.contents
15
+
16
+ puts "Class = #{ data.class.name }"
17
+ if data.respond_to? :each
18
+ @tree = parse_collection(data)
19
+ else
20
+ @tree = parse_string(data)
16
21
  end
17
- @notices
22
+ @tree
18
23
  end
19
24
 
20
25
  private
21
26
 
22
- def split_file_contents
23
- contents = @file.read
24
- contents = cleanup_contents(contents)
25
- scanner = StringScanner.new(contents)
26
- text_notices = []
27
-
28
- while !scanner.eos?
29
- initial_tag = scanner.check_until(@tag_pattern)
30
- if initial_tag
31
- initial_tag.strip!
32
- else
33
- break
34
- end
35
-
36
- text_notice = scanner.scan_until(/#{ closing_tag_for(initial_tag) }\s*/)
37
- if text_notice
38
- text_notices << text_notice.strip
39
- else
40
- break
41
- end
42
- end
43
- text_notices
27
+ def parser
28
+ @parser ||= FBO::DumpParser.new
44
29
  end
45
30
 
46
- def parse_notices(text_notices)
47
- @notices = []
48
- text_notices.each do |text|
49
- handler = FBO::Parser::HandlerSelector.select(text)
50
- @notices << handler.parse(text)
31
+ def parse_string(data)
32
+ tree = parser.parse(data)
33
+ if tree.nil?
34
+ line = parser.failure_line
35
+ column = parser.failure_column
36
+ reason = parser.failure_reason
37
+ raise Exception, "Parse error at line #{ line }, column #{ column }: #{ reason }"
51
38
  end
39
+ clean_tree(tree)
52
40
  end
53
41
 
54
- def cleanup_contents(contents)
55
- contents
56
- .encode('UTF-16le', :invalid => :replace, :replace => '')
57
- .encode('UTF-8')
58
- .gsub(/\r\n/, "\n")
59
- .gsub(/^M/, "")
42
+ def parse_collection(data)
43
+ super_tree = nil
44
+ data.each do |string|
45
+ tree = parse_string(string)
46
+ if super_tree
47
+ super_tree = FBO::Dump::DumpNode.new(super_tree.input, super_tree.interval,
48
+ super_tree.elements + tree.elements)
49
+ else
50
+ super_tree = tree
51
+ end
52
+ end
53
+ super_tree
60
54
  end
61
-
62
- def closing_tag_for(tag)
63
- tag.sub(/</, "</")
55
+
56
+ def clean_tree(node)
57
+ return if node.elements.nil?
58
+ node.elements.delete_if { |node| node.class.name == "Treetop::Runtime::SyntaxNode" }
59
+ node.elements.each { |node| clean_tree(node) }
60
+ node
64
61
  end
65
62
  end
66
63
  end
@@ -1,10 +1,10 @@
1
- require "date"
2
- require "net/ftp"
1
+ require 'date'
2
+ require 'net/ftp'
3
3
 
4
4
  module FBO
5
5
  class RemoteFile < File
6
- FTP_SERVER = "ftp.fbo.gov"
7
- TMP_DIR = "/tmp/fbo"
6
+ FTP_SERVER = 'ftp.fbo.gov'
7
+ TMP_DIR = '/tmp/fbo'
8
8
 
9
9
  class << self
10
10
  def for_date(date, options = {})
data/lib/fbo/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module FBO
2
- VERSION = "0.0.3"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe FBO::ChunkedFile do
4
+ let(:filename) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'FBOFeed20131003') }
5
+ let(:file) { FBO::File.new(filename) }
6
+ let(:chunk_size) { 50 * 1024 } # 50KB
7
+ subject { FBO::ChunkedFile.new(file, chunk_size) }
8
+
9
+ describe "#contents" do
10
+ it "returns an Array of String" do
11
+ subject.contents.must_be_instance_of Array
12
+ subject.contents.each { |x| x.must_be_instance_of String }
13
+ end
14
+
15
+ it "returns chunks that include whole notices" do
16
+ contents = subject.contents
17
+ contents.each do |chunk|
18
+ chunk.must_match FBO::ChunkedFile::NOTICE_CLOSE_REGEXP
19
+ end
20
+ end
21
+ end
22
+ end