udise_school_report_reader 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +20 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +45 -0
  5. data/lib/udise_school_report_reader/activities_data_reader.rb +58 -0
  6. data/lib/udise_school_report_reader/anganwadi_data_reader.rb +22 -0
  7. data/lib/udise_school_report_reader/basic_info_data_reader.rb +29 -0
  8. data/lib/udise_school_report_reader/block_rectangle_combiner.rb +115 -0
  9. data/lib/udise_school_report_reader/building_data_reader.rb +36 -0
  10. data/lib/udise_school_report_reader/characteristics_reader.rb +28 -0
  11. data/lib/udise_school_report_reader/csv_writer.rb +75 -0
  12. data/lib/udise_school_report_reader/data_reader_base.rb +86 -0
  13. data/lib/udise_school_report_reader/digital_facilities_data_reader.rb +42 -0
  14. data/lib/udise_school_report_reader/enrollment_data_reader.rb +136 -0
  15. data/lib/udise_school_report_reader/enrollment_html_writer.rb +81 -0
  16. data/lib/udise_school_report_reader/enrollment_yaml_writer.rb +62 -0
  17. data/lib/udise_school_report_reader/ews_data_reader.rb +118 -0
  18. data/lib/udise_school_report_reader/ews_html_writer.rb +63 -0
  19. data/lib/udise_school_report_reader/ews_yaml_writer.rb +31 -0
  20. data/lib/udise_school_report_reader/location_data_reader.rb +47 -0
  21. data/lib/udise_school_report_reader/official_data_reader.rb +40 -0
  22. data/lib/udise_school_report_reader/pdf_block_extractor.rb +49 -0
  23. data/lib/udise_school_report_reader/pdf_content_compressor.rb +36 -0
  24. data/lib/udise_school_report_reader/pdf_rectangle_extractor.rb +53 -0
  25. data/lib/udise_school_report_reader/rooms_data_reader.rb +36 -0
  26. data/lib/udise_school_report_reader/rte_data_reader.rb +118 -0
  27. data/lib/udise_school_report_reader/rte_html_writer.rb +63 -0
  28. data/lib/udise_school_report_reader/rte_yaml_writer.rb +61 -0
  29. data/lib/udise_school_report_reader/sanitation_data_reader.rb +56 -0
  30. data/lib/udise_school_report_reader/school_report_parser.rb +295 -0
  31. data/lib/udise_school_report_reader/teacher_data_reader.rb +204 -0
  32. data/lib/udise_school_report_reader/version.rb +3 -0
  33. data/lib/udise_school_report_reader.rb +41 -0
  34. data/test/school_report_parser_test.rb +62 -0
  35. metadata +165 -0
@@ -0,0 +1,204 @@
1
+ require_relative 'data_reader_base'
2
+
3
+ class TeacherDataReader
4
+ include DataReaderBase
5
+
6
+ FIELD_MAPPINGS = {
7
+ 'Regular' => {
8
+ key_path: ['teachers', 'count_by_level', 'regular'],
9
+ value_type: :integer
10
+ },
11
+ 'Part-time' => {
12
+ key_path: ['teachers', 'count_by_level', 'part_time'],
13
+ value_type: :integer
14
+ },
15
+ 'Contract' => {
16
+ key_path: ['teachers', 'count_by_level', 'contract'],
17
+ value_type: :integer
18
+ },
19
+ 'Male' => {
20
+ key_path: ['teachers', 'demographics', 'male'],
21
+ value_type: :integer
22
+ },
23
+ 'Female' => {
24
+ key_path: ['teachers', 'demographics', 'female'],
25
+ value_type: :integer
26
+ },
27
+ 'Transgender' => {
28
+ key_path: ['teachers', 'demographics', 'transgender'],
29
+ value_type: :integer
30
+ },
31
+ 'Below Graduate' => {
32
+ key_path: ['teachers', 'qualifications', 'academic', 'below_graduate'],
33
+ value_type: :integer
34
+ },
35
+ 'Graduate' => {
36
+ key_path: ['teachers', 'qualifications', 'academic', 'graduate'],
37
+ value_type: :integer
38
+ },
39
+ 'Post Graduate and Above' => {
40
+ key_path: ['teachers', 'qualifications', 'academic', 'post_graduate_and_above'],
41
+ value_type: :integer
42
+ },
43
+ 'B.Ed. or Equivalent' => {
44
+ key_path: ['teachers', 'qualifications', 'professional', 'bed'],
45
+ value_type: :integer
46
+ },
47
+ 'M.Ed. or Equivalent' => {
48
+ key_path: ['teachers', 'qualifications', 'professional', 'med'],
49
+ value_type: :integer
50
+ },
51
+ 'Diploma or Certificate in basic teachers training' => {
52
+ key_path: ['teachers', 'qualifications', 'professional', 'basic_training'],
53
+ value_type: :integer
54
+ },
55
+ 'Bachelor of Elementary Education (B.El.Ed.)' => {
56
+ key_path: ['teachers', 'qualifications', 'professional', 'beled'],
57
+ value_type: :integer
58
+ },
59
+ 'Diploma/degree in special Education' => {
60
+ key_path: ['teachers', 'qualifications', 'professional', 'special_education'],
61
+ value_type: :integer
62
+ },
63
+ 'Teachers Aged above 55' => {
64
+ key_path: ['teachers', 'age_distribution', 'above_55'],
65
+ value_type: :integer
66
+ },
67
+ 'Total Teacher Trained in Computer' => {
68
+ key_path: ['teachers', 'training', 'computer_trained'],
69
+ value_type: :integer
70
+ },
71
+ 'No. of Total Teacher Received Service Training' => {
72
+ key_path: ['teachers', 'training', 'service', 'total'],
73
+ value_type: :integer
74
+ },
75
+ 'Special Training Received' => {
76
+ key_path: ['teachers', 'training', 'special', 'received'],
77
+ value_type: :string
78
+ },
79
+ 'Teaching Hours per Week' => {
80
+ key_path: ['teachers', 'workload', 'teaching_hours', 'per_week'],
81
+ value_type: :integer,
82
+ extract_pattern: /(\d+)/
83
+ },
84
+ 'Non-Teaching Hours' => {
85
+ key_path: ['teachers', 'workload', 'non_teaching_hours', 'per_week'],
86
+ value_type: :integer,
87
+ extract_pattern: /(\d+)/
88
+ },
89
+ 'Total Teacher Involve in Non Teaching Assignment' => {
90
+ key_path: ['teachers', 'assignments', 'non_teaching'],
91
+ value_type: :integer,
92
+ extract_pattern: /^(\d+)$/
93
+ },
94
+ 'Subject:' => {
95
+ key_path: ['teachers', 'workload', 'by_subject'],
96
+ value_type: :integer,
97
+ extract_pattern: /^Subject:\s*(.+?)(?:\s*,\s*Teachers:\s*(\d+))?$/,
98
+ dynamic_key: true,
99
+ key_from_match: 1,
100
+ value_from_match: 2,
101
+ key_transform: :downcase
102
+ }
103
+ }
104
+
105
+ def self.read(lines)
106
+ require 'yaml'
107
+ template = YAML.load_file('template.yml')
108
+ data = { 'teachers' => template['teachers'] }
109
+
110
+ # Process base module mappings first
111
+ base_data = super
112
+ if base_data&.dig('teachers')
113
+ data['teachers']['count_by_level'] = base_data['teachers']['count_by_level'] if base_data['teachers']['count_by_level']
114
+ data['teachers']['demographics'] = base_data['teachers']['demographics'] if base_data['teachers']['demographics']
115
+ data['teachers']['age_distribution'] = base_data['teachers']['age_distribution'] if base_data['teachers']['age_distribution']
116
+
117
+ # Handle nested structures
118
+ data['teachers']['assignments'] ||= {}
119
+ data['teachers']['assignments']['non_teaching'] = base_data['teachers']['assignments']['non_teaching'] if base_data['teachers']['assignments']&.dig('non_teaching')
120
+
121
+ if base_data['teachers']['qualifications']
122
+ data['teachers']['qualifications'] ||= {}
123
+ data['teachers']['qualifications']['academic'] = base_data['teachers']['qualifications']['academic'] if base_data['teachers']['qualifications']['academic']
124
+ data['teachers']['qualifications']['professional'] = base_data['teachers']['qualifications']['professional'] if base_data['teachers']['qualifications']['professional']
125
+ end
126
+
127
+ if base_data['teachers']['training']
128
+ data['teachers']['training'] ||= {}
129
+ data['teachers']['training']['computer_trained'] = base_data['teachers']['training']['computer_trained'] if base_data['teachers']['training']['computer_trained']
130
+ data['teachers']['training']['service'] ||= {}
131
+ data['teachers']['training']['service']['total'] = base_data['teachers']['training']['service']['total'] if base_data['teachers']['training']['service']&.dig('total')
132
+ data['teachers']['training']['special'] ||= {}
133
+ data['teachers']['training']['special']['received'] = base_data['teachers']['training']['special']['received'] if base_data['teachers']['training']['special']&.dig('received')
134
+ end
135
+
136
+ if base_data['teachers']['workload']
137
+ data['teachers']['workload'] ||= {}
138
+ data['teachers']['workload']['teaching_hours'] = base_data['teachers']['workload']['teaching_hours'] if base_data['teachers']['workload']['teaching_hours']
139
+ data['teachers']['workload']['non_teaching_hours'] = base_data['teachers']['workload']['non_teaching_hours'] if base_data['teachers']['workload']['non_teaching_hours']
140
+ data['teachers']['workload']['by_subject'] = base_data['teachers']['workload']['by_subject'] if base_data['teachers']['workload']['by_subject']
141
+ end
142
+ end
143
+
144
+ lines.each_with_index do |line, i|
145
+ next_line = lines[i + 1]&.strip
146
+
147
+ case line
148
+ when /^(\d+)-(.+)$/
149
+ category = $2.strip
150
+ if next_line =~ /^\d+$/
151
+ key = case category
152
+ when 'Primary'
153
+ 'primary'
154
+ when 'Up.Pr.'
155
+ 'upper_primary'
156
+ when 'Pr. & Up.Pr.'
157
+ 'primary_and_upper_primary'
158
+ when 'Sec. only'
159
+ 'secondary_only'
160
+ when 'H Sec only.'
161
+ 'higher_secondary_only'
162
+ when 'Up pri and Sec.'
163
+ 'upper_primary_and_secondary'
164
+ when 'Sec and H Sec'
165
+ 'secondary_and_higher_secondary'
166
+ when 'Pre-Primary Only.'
167
+ 'pre_primary_only'
168
+ when 'Pre- Pri & Pri'
169
+ 'pre_primary_and_primary'
170
+ else
171
+ category.downcase.gsub(/[^a-z0-9]+/, '_').gsub(/^_|_$/, '')
172
+ end
173
+
174
+ data['teachers']['classes_taught'][key] = next_line.to_i
175
+ end
176
+
177
+ when "Other"
178
+ if next_line =~ /^\d+$/
179
+ data['teachers']['qualifications']['professional']['other'] = next_line.to_i
180
+ end
181
+ when "None"
182
+ if next_line =~ /^\d+$/
183
+ data['teachers']['qualifications']['professional']['none'] = next_line.to_i
184
+ end
185
+ when "Pursuing any Relevant Professional Course"
186
+ if next_line =~ /^\d+$/
187
+ data['teachers']['qualifications']['professional']['pursuing_course'] = next_line.to_i
188
+ end
189
+ end
190
+ end
191
+
192
+ # Clean up empty sections
193
+ data['teachers'].each do |key, section|
194
+ if section.is_a?(Hash)
195
+ # Don't clean up assignments section
196
+ next if key == 'assignments'
197
+ section.reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
198
+ end
199
+ end
200
+ data['teachers'].reject! { |k, v| v.nil? || (v.is_a?(Hash) && v.empty? && k != 'assignments') }
201
+
202
+ data
203
+ end
204
+ end
@@ -0,0 +1,3 @@
1
+ module UdiseSchoolReportReader
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,41 @@
1
+ require "udise_school_report_reader/version"
2
+
3
+ # Core functionality
4
+ require_relative "udise_school_report_reader/pdf_block_extractor"
5
+ require_relative "udise_school_report_reader/pdf_rectangle_extractor"
6
+ require_relative "udise_school_report_reader/pdf_content_compressor"
7
+ require_relative "udise_school_report_reader/block_rectangle_combiner"
8
+
9
+ # Data readers
10
+ require_relative "udise_school_report_reader/data_reader_base"
11
+ require_relative "udise_school_report_reader/activities_data_reader"
12
+ require_relative "udise_school_report_reader/anganwadi_data_reader"
13
+ require_relative "udise_school_report_reader/basic_info_data_reader"
14
+ require_relative "udise_school_report_reader/building_data_reader"
15
+ require_relative "udise_school_report_reader/characteristics_reader"
16
+ require_relative "udise_school_report_reader/digital_facilities_data_reader"
17
+ require_relative "udise_school_report_reader/enrollment_data_reader"
18
+ require_relative "udise_school_report_reader/ews_data_reader"
19
+ require_relative "udise_school_report_reader/location_data_reader"
20
+ require_relative "udise_school_report_reader/official_data_reader"
21
+ require_relative "udise_school_report_reader/rooms_data_reader"
22
+ require_relative "udise_school_report_reader/rte_data_reader"
23
+ require_relative "udise_school_report_reader/sanitation_data_reader"
24
+ require_relative "udise_school_report_reader/teacher_data_reader"
25
+
26
+ # Writers
27
+ require_relative "udise_school_report_reader/csv_writer"
28
+ require_relative "udise_school_report_reader/enrollment_html_writer"
29
+ require_relative "udise_school_report_reader/enrollment_yaml_writer"
30
+ require_relative "udise_school_report_reader/ews_html_writer"
31
+ require_relative "udise_school_report_reader/ews_yaml_writer"
32
+ require_relative "udise_school_report_reader/rte_html_writer"
33
+ require_relative "udise_school_report_reader/rte_yaml_writer"
34
+
35
+ # Main parser
36
+ require_relative "udise_school_report_reader/school_report_parser"
37
+
38
+ module UdiseSchoolReportReader
39
+ class Error < StandardError; end
40
+ # Your code goes here...
41
+ end
@@ -0,0 +1,62 @@
1
+ require 'minitest/autorun'
2
+ require 'fileutils'
3
+ require 'yaml'
4
+ require 'udise_school_report_reader'
5
+
6
+ class SchoolReportParserTest < Minitest::Test
7
+ def setup
8
+ @schools = ['carmel-2223', 'jhita-2223', 'kachora-2223', 'sarvo-2223']
9
+ end
10
+
11
+ def test_yaml_generation
12
+ @schools.each do |school|
13
+ pdf_path = File.join(File.dirname(__dir__), "samples/#{school}/#{school}.pdf")
14
+ yaml_path = File.join(File.dirname(__dir__), "samples/#{school}/#{school}.yml")
15
+
16
+ # Skip if PDF doesn't exist
17
+ next unless File.exist?(pdf_path)
18
+ next unless File.exist?(yaml_path) # Skip if no benchmark YAML exists
19
+
20
+ # Generate new YAML data without writing files
21
+ actual_data = UdiseSchoolReportReader::SchoolReportParser.extract_to_text(pdf_path)
22
+
23
+ # Compare with benchmark
24
+ expected = YAML.load_file(yaml_path)
25
+ compare_nested_hashes(expected, actual_data, [], school)
26
+ end
27
+
28
+ assert true, "All schools processed successfully"
29
+ end
30
+
31
+ private
32
+
33
+ def compare_nested_hashes(expected, actual, path = [], school)
34
+ return if expected == actual
35
+
36
+ if expected.is_a?(Hash) && actual.is_a?(Hash)
37
+ # Check for missing keys
38
+ (expected.keys - actual.keys).each do |key|
39
+ flunk "Missing key '#{(path + [key]).join('.')}' in #{school}"
40
+ end
41
+
42
+ # Check for extra keys
43
+ (actual.keys - expected.keys).each do |key|
44
+ flunk "Extra key '#{(path + [key]).join('.')}' in #{school}"
45
+ end
46
+
47
+ # Compare values for common keys
48
+ (expected.keys & actual.keys).each do |key|
49
+ compare_nested_hashes(expected[key], actual[key], path + [key], school)
50
+ end
51
+ elsif expected.is_a?(Array) && actual.is_a?(Array)
52
+ if expected.length != actual.length
53
+ flunk "Array length mismatch at '#{path.join('.')}' in #{school}. Expected #{expected.length}, got #{actual.length}"
54
+ end
55
+ expected.zip(actual).each_with_index do |(exp_item, act_item), idx|
56
+ compare_nested_hashes(exp_item, act_item, path + [idx], school)
57
+ end
58
+ else
59
+ assert_equal expected, actual, "Value mismatch at '#{path.join('.')}' in #{school}. Expected #{expected.inspect}, got #{actual.inspect}"
60
+ end
61
+ end
62
+ end
metadata ADDED
@@ -0,0 +1,165 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: udise_school_report_reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Syed Fazil Basheer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2025-01-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pdf-reader
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: csv
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '13.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '13.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.0'
97
+ description: This gem provides functionality to read and parse UDISE school reports,
98
+ extracting various data points including teacher information, room details, location
99
+ data, and more.
100
+ email:
101
+ - fazil@fazn.co
102
+ executables: []
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - CHANGELOG.md
107
+ - LICENSE.txt
108
+ - README.md
109
+ - lib/udise_school_report_reader.rb
110
+ - lib/udise_school_report_reader/activities_data_reader.rb
111
+ - lib/udise_school_report_reader/anganwadi_data_reader.rb
112
+ - lib/udise_school_report_reader/basic_info_data_reader.rb
113
+ - lib/udise_school_report_reader/block_rectangle_combiner.rb
114
+ - lib/udise_school_report_reader/building_data_reader.rb
115
+ - lib/udise_school_report_reader/characteristics_reader.rb
116
+ - lib/udise_school_report_reader/csv_writer.rb
117
+ - lib/udise_school_report_reader/data_reader_base.rb
118
+ - lib/udise_school_report_reader/digital_facilities_data_reader.rb
119
+ - lib/udise_school_report_reader/enrollment_data_reader.rb
120
+ - lib/udise_school_report_reader/enrollment_html_writer.rb
121
+ - lib/udise_school_report_reader/enrollment_yaml_writer.rb
122
+ - lib/udise_school_report_reader/ews_data_reader.rb
123
+ - lib/udise_school_report_reader/ews_html_writer.rb
124
+ - lib/udise_school_report_reader/ews_yaml_writer.rb
125
+ - lib/udise_school_report_reader/location_data_reader.rb
126
+ - lib/udise_school_report_reader/official_data_reader.rb
127
+ - lib/udise_school_report_reader/pdf_block_extractor.rb
128
+ - lib/udise_school_report_reader/pdf_content_compressor.rb
129
+ - lib/udise_school_report_reader/pdf_rectangle_extractor.rb
130
+ - lib/udise_school_report_reader/rooms_data_reader.rb
131
+ - lib/udise_school_report_reader/rte_data_reader.rb
132
+ - lib/udise_school_report_reader/rte_html_writer.rb
133
+ - lib/udise_school_report_reader/rte_yaml_writer.rb
134
+ - lib/udise_school_report_reader/sanitation_data_reader.rb
135
+ - lib/udise_school_report_reader/school_report_parser.rb
136
+ - lib/udise_school_report_reader/teacher_data_reader.rb
137
+ - lib/udise_school_report_reader/version.rb
138
+ - test/school_report_parser_test.rb
139
+ homepage: https://github.com/UDISE-Plus/udise-school-report-reader
140
+ licenses:
141
+ - MIT
142
+ metadata:
143
+ homepage_uri: https://github.com/UDISE-Plus/udise-school-report-reader
144
+ source_code_uri: https://github.com/UDISE-Plus/udise-school-report-reader
145
+ changelog_uri: https://github.com/UDISE-Plus/udise-school-report-reader/blob/master/CHANGELOG.md
146
+ post_install_message:
147
+ rdoc_options: []
148
+ require_paths:
149
+ - lib
150
+ required_ruby_version: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ version: 2.6.0
155
+ required_rubygems_version: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ requirements: []
161
+ rubygems_version: 3.5.11
162
+ signing_key:
163
+ specification_version: 4
164
+ summary: A Ruby gem to parse and extract data from UDISE school reports
165
+ test_files: []