udise_school_report_reader 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +20 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +45 -0
  5. data/lib/udise_school_report_reader/activities_data_reader.rb +58 -0
  6. data/lib/udise_school_report_reader/anganwadi_data_reader.rb +22 -0
  7. data/lib/udise_school_report_reader/basic_info_data_reader.rb +29 -0
  8. data/lib/udise_school_report_reader/block_rectangle_combiner.rb +115 -0
  9. data/lib/udise_school_report_reader/building_data_reader.rb +36 -0
  10. data/lib/udise_school_report_reader/characteristics_reader.rb +28 -0
  11. data/lib/udise_school_report_reader/csv_writer.rb +75 -0
  12. data/lib/udise_school_report_reader/data_reader_base.rb +86 -0
  13. data/lib/udise_school_report_reader/digital_facilities_data_reader.rb +42 -0
  14. data/lib/udise_school_report_reader/enrollment_data_reader.rb +136 -0
  15. data/lib/udise_school_report_reader/enrollment_html_writer.rb +81 -0
  16. data/lib/udise_school_report_reader/enrollment_yaml_writer.rb +62 -0
  17. data/lib/udise_school_report_reader/ews_data_reader.rb +118 -0
  18. data/lib/udise_school_report_reader/ews_html_writer.rb +63 -0
  19. data/lib/udise_school_report_reader/ews_yaml_writer.rb +31 -0
  20. data/lib/udise_school_report_reader/location_data_reader.rb +47 -0
  21. data/lib/udise_school_report_reader/official_data_reader.rb +40 -0
  22. data/lib/udise_school_report_reader/pdf_block_extractor.rb +49 -0
  23. data/lib/udise_school_report_reader/pdf_content_compressor.rb +36 -0
  24. data/lib/udise_school_report_reader/pdf_rectangle_extractor.rb +53 -0
  25. data/lib/udise_school_report_reader/rooms_data_reader.rb +36 -0
  26. data/lib/udise_school_report_reader/rte_data_reader.rb +118 -0
  27. data/lib/udise_school_report_reader/rte_html_writer.rb +63 -0
  28. data/lib/udise_school_report_reader/rte_yaml_writer.rb +61 -0
  29. data/lib/udise_school_report_reader/sanitation_data_reader.rb +56 -0
  30. data/lib/udise_school_report_reader/school_report_parser.rb +295 -0
  31. data/lib/udise_school_report_reader/teacher_data_reader.rb +204 -0
  32. data/lib/udise_school_report_reader/version.rb +3 -0
  33. data/lib/udise_school_report_reader.rb +41 -0
  34. data/test/school_report_parser_test.rb +62 -0
  35. metadata +165 -0
@@ -0,0 +1,204 @@
1
+ require_relative 'data_reader_base'
2
+
3
+ class TeacherDataReader
4
+ include DataReaderBase
5
+
6
+ FIELD_MAPPINGS = {
7
+ 'Regular' => {
8
+ key_path: ['teachers', 'count_by_level', 'regular'],
9
+ value_type: :integer
10
+ },
11
+ 'Part-time' => {
12
+ key_path: ['teachers', 'count_by_level', 'part_time'],
13
+ value_type: :integer
14
+ },
15
+ 'Contract' => {
16
+ key_path: ['teachers', 'count_by_level', 'contract'],
17
+ value_type: :integer
18
+ },
19
+ 'Male' => {
20
+ key_path: ['teachers', 'demographics', 'male'],
21
+ value_type: :integer
22
+ },
23
+ 'Female' => {
24
+ key_path: ['teachers', 'demographics', 'female'],
25
+ value_type: :integer
26
+ },
27
+ 'Transgender' => {
28
+ key_path: ['teachers', 'demographics', 'transgender'],
29
+ value_type: :integer
30
+ },
31
+ 'Below Graduate' => {
32
+ key_path: ['teachers', 'qualifications', 'academic', 'below_graduate'],
33
+ value_type: :integer
34
+ },
35
+ 'Graduate' => {
36
+ key_path: ['teachers', 'qualifications', 'academic', 'graduate'],
37
+ value_type: :integer
38
+ },
39
+ 'Post Graduate and Above' => {
40
+ key_path: ['teachers', 'qualifications', 'academic', 'post_graduate_and_above'],
41
+ value_type: :integer
42
+ },
43
+ 'B.Ed. or Equivalent' => {
44
+ key_path: ['teachers', 'qualifications', 'professional', 'bed'],
45
+ value_type: :integer
46
+ },
47
+ 'M.Ed. or Equivalent' => {
48
+ key_path: ['teachers', 'qualifications', 'professional', 'med'],
49
+ value_type: :integer
50
+ },
51
+ 'Diploma or Certificate in basic teachers training' => {
52
+ key_path: ['teachers', 'qualifications', 'professional', 'basic_training'],
53
+ value_type: :integer
54
+ },
55
+ 'Bachelor of Elementary Education (B.El.Ed.)' => {
56
+ key_path: ['teachers', 'qualifications', 'professional', 'beled'],
57
+ value_type: :integer
58
+ },
59
+ 'Diploma/degree in special Education' => {
60
+ key_path: ['teachers', 'qualifications', 'professional', 'special_education'],
61
+ value_type: :integer
62
+ },
63
+ 'Teachers Aged above 55' => {
64
+ key_path: ['teachers', 'age_distribution', 'above_55'],
65
+ value_type: :integer
66
+ },
67
+ 'Total Teacher Trained in Computer' => {
68
+ key_path: ['teachers', 'training', 'computer_trained'],
69
+ value_type: :integer
70
+ },
71
+ 'No. of Total Teacher Received Service Training' => {
72
+ key_path: ['teachers', 'training', 'service', 'total'],
73
+ value_type: :integer
74
+ },
75
+ 'Special Training Received' => {
76
+ key_path: ['teachers', 'training', 'special', 'received'],
77
+ value_type: :string
78
+ },
79
+ 'Teaching Hours per Week' => {
80
+ key_path: ['teachers', 'workload', 'teaching_hours', 'per_week'],
81
+ value_type: :integer,
82
+ extract_pattern: /(\d+)/
83
+ },
84
+ 'Non-Teaching Hours' => {
85
+ key_path: ['teachers', 'workload', 'non_teaching_hours', 'per_week'],
86
+ value_type: :integer,
87
+ extract_pattern: /(\d+)/
88
+ },
89
+ 'Total Teacher Involve in Non Teaching Assignment' => {
90
+ key_path: ['teachers', 'assignments', 'non_teaching'],
91
+ value_type: :integer,
92
+ extract_pattern: /^(\d+)$/
93
+ },
94
+ 'Subject:' => {
95
+ key_path: ['teachers', 'workload', 'by_subject'],
96
+ value_type: :integer,
97
+ extract_pattern: /^Subject:\s*(.+?)(?:\s*,\s*Teachers:\s*(\d+))?$/,
98
+ dynamic_key: true,
99
+ key_from_match: 1,
100
+ value_from_match: 2,
101
+ key_transform: :downcase
102
+ }
103
+ }
104
+
105
+ def self.read(lines)
106
+ require 'yaml'
107
+ template = YAML.load_file('template.yml')
108
+ data = { 'teachers' => template['teachers'] }
109
+
110
+ # Process base module mappings first
111
+ base_data = super
112
+ if base_data&.dig('teachers')
113
+ data['teachers']['count_by_level'] = base_data['teachers']['count_by_level'] if base_data['teachers']['count_by_level']
114
+ data['teachers']['demographics'] = base_data['teachers']['demographics'] if base_data['teachers']['demographics']
115
+ data['teachers']['age_distribution'] = base_data['teachers']['age_distribution'] if base_data['teachers']['age_distribution']
116
+
117
+ # Handle nested structures
118
+ data['teachers']['assignments'] ||= {}
119
+ data['teachers']['assignments']['non_teaching'] = base_data['teachers']['assignments']['non_teaching'] if base_data['teachers']['assignments']&.dig('non_teaching')
120
+
121
+ if base_data['teachers']['qualifications']
122
+ data['teachers']['qualifications'] ||= {}
123
+ data['teachers']['qualifications']['academic'] = base_data['teachers']['qualifications']['academic'] if base_data['teachers']['qualifications']['academic']
124
+ data['teachers']['qualifications']['professional'] = base_data['teachers']['qualifications']['professional'] if base_data['teachers']['qualifications']['professional']
125
+ end
126
+
127
+ if base_data['teachers']['training']
128
+ data['teachers']['training'] ||= {}
129
+ data['teachers']['training']['computer_trained'] = base_data['teachers']['training']['computer_trained'] if base_data['teachers']['training']['computer_trained']
130
+ data['teachers']['training']['service'] ||= {}
131
+ data['teachers']['training']['service']['total'] = base_data['teachers']['training']['service']['total'] if base_data['teachers']['training']['service']&.dig('total')
132
+ data['teachers']['training']['special'] ||= {}
133
+ data['teachers']['training']['special']['received'] = base_data['teachers']['training']['special']['received'] if base_data['teachers']['training']['special']&.dig('received')
134
+ end
135
+
136
+ if base_data['teachers']['workload']
137
+ data['teachers']['workload'] ||= {}
138
+ data['teachers']['workload']['teaching_hours'] = base_data['teachers']['workload']['teaching_hours'] if base_data['teachers']['workload']['teaching_hours']
139
+ data['teachers']['workload']['non_teaching_hours'] = base_data['teachers']['workload']['non_teaching_hours'] if base_data['teachers']['workload']['non_teaching_hours']
140
+ data['teachers']['workload']['by_subject'] = base_data['teachers']['workload']['by_subject'] if base_data['teachers']['workload']['by_subject']
141
+ end
142
+ end
143
+
144
+ lines.each_with_index do |line, i|
145
+ next_line = lines[i + 1]&.strip
146
+
147
+ case line
148
+ when /^(\d+)-(.+)$/
149
+ category = $2.strip
150
+ if next_line =~ /^\d+$/
151
+ key = case category
152
+ when 'Primary'
153
+ 'primary'
154
+ when 'Up.Pr.'
155
+ 'upper_primary'
156
+ when 'Pr. & Up.Pr.'
157
+ 'primary_and_upper_primary'
158
+ when 'Sec. only'
159
+ 'secondary_only'
160
+ when 'H Sec only.'
161
+ 'higher_secondary_only'
162
+ when 'Up pri and Sec.'
163
+ 'upper_primary_and_secondary'
164
+ when 'Sec and H Sec'
165
+ 'secondary_and_higher_secondary'
166
+ when 'Pre-Primary Only.'
167
+ 'pre_primary_only'
168
+ when 'Pre- Pri & Pri'
169
+ 'pre_primary_and_primary'
170
+ else
171
+ category.downcase.gsub(/[^a-z0-9]+/, '_').gsub(/^_|_$/, '')
172
+ end
173
+
174
+ data['teachers']['classes_taught'][key] = next_line.to_i
175
+ end
176
+
177
+ when "Other"
178
+ if next_line =~ /^\d+$/
179
+ data['teachers']['qualifications']['professional']['other'] = next_line.to_i
180
+ end
181
+ when "None"
182
+ if next_line =~ /^\d+$/
183
+ data['teachers']['qualifications']['professional']['none'] = next_line.to_i
184
+ end
185
+ when "Pursuing any Relevant Professional Course"
186
+ if next_line =~ /^\d+$/
187
+ data['teachers']['qualifications']['professional']['pursuing_course'] = next_line.to_i
188
+ end
189
+ end
190
+ end
191
+
192
+ # Clean up empty sections
193
+ data['teachers'].each do |key, section|
194
+ if section.is_a?(Hash)
195
+ # Don't clean up assignments section
196
+ next if key == 'assignments'
197
+ section.reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
198
+ end
199
+ end
200
+ data['teachers'].reject! { |k, v| v.nil? || (v.is_a?(Hash) && v.empty? && k != 'assignments') }
201
+
202
+ data
203
+ end
204
+ end
@@ -0,0 +1,3 @@
1
+ module UdiseSchoolReportReader
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,41 @@
1
+ require "udise_school_report_reader/version"
2
+
3
+ # Core functionality
4
+ require_relative "udise_school_report_reader/pdf_block_extractor"
5
+ require_relative "udise_school_report_reader/pdf_rectangle_extractor"
6
+ require_relative "udise_school_report_reader/pdf_content_compressor"
7
+ require_relative "udise_school_report_reader/block_rectangle_combiner"
8
+
9
+ # Data readers
10
+ require_relative "udise_school_report_reader/data_reader_base"
11
+ require_relative "udise_school_report_reader/activities_data_reader"
12
+ require_relative "udise_school_report_reader/anganwadi_data_reader"
13
+ require_relative "udise_school_report_reader/basic_info_data_reader"
14
+ require_relative "udise_school_report_reader/building_data_reader"
15
+ require_relative "udise_school_report_reader/characteristics_reader"
16
+ require_relative "udise_school_report_reader/digital_facilities_data_reader"
17
+ require_relative "udise_school_report_reader/enrollment_data_reader"
18
+ require_relative "udise_school_report_reader/ews_data_reader"
19
+ require_relative "udise_school_report_reader/location_data_reader"
20
+ require_relative "udise_school_report_reader/official_data_reader"
21
+ require_relative "udise_school_report_reader/rooms_data_reader"
22
+ require_relative "udise_school_report_reader/rte_data_reader"
23
+ require_relative "udise_school_report_reader/sanitation_data_reader"
24
+ require_relative "udise_school_report_reader/teacher_data_reader"
25
+
26
+ # Writers
27
+ require_relative "udise_school_report_reader/csv_writer"
28
+ require_relative "udise_school_report_reader/enrollment_html_writer"
29
+ require_relative "udise_school_report_reader/enrollment_yaml_writer"
30
+ require_relative "udise_school_report_reader/ews_html_writer"
31
+ require_relative "udise_school_report_reader/ews_yaml_writer"
32
+ require_relative "udise_school_report_reader/rte_html_writer"
33
+ require_relative "udise_school_report_reader/rte_yaml_writer"
34
+
35
+ # Main parser
36
+ require_relative "udise_school_report_reader/school_report_parser"
37
+
38
+ module UdiseSchoolReportReader
39
+ class Error < StandardError; end
40
+ # Your code goes here...
41
+ end
@@ -0,0 +1,62 @@
1
+ require 'minitest/autorun'
2
+ require 'fileutils'
3
+ require 'yaml'
4
+ require 'udise_school_report_reader'
5
+
6
+ class SchoolReportParserTest < Minitest::Test
7
+ def setup
8
+ @schools = ['carmel-2223', 'jhita-2223', 'kachora-2223', 'sarvo-2223']
9
+ end
10
+
11
+ def test_yaml_generation
12
+ @schools.each do |school|
13
+ pdf_path = File.join(File.dirname(__dir__), "samples/#{school}/#{school}.pdf")
14
+ yaml_path = File.join(File.dirname(__dir__), "samples/#{school}/#{school}.yml")
15
+
16
+ # Skip if PDF doesn't exist
17
+ next unless File.exist?(pdf_path)
18
+ next unless File.exist?(yaml_path) # Skip if no benchmark YAML exists
19
+
20
+ # Generate new YAML data without writing files
21
+ actual_data = UdiseSchoolReportReader::SchoolReportParser.extract_to_text(pdf_path)
22
+
23
+ # Compare with benchmark
24
+ expected = YAML.load_file(yaml_path)
25
+ compare_nested_hashes(expected, actual_data, [], school)
26
+ end
27
+
28
+ assert true, "All schools processed successfully"
29
+ end
30
+
31
+ private
32
+
33
+ def compare_nested_hashes(expected, actual, path = [], school)
34
+ return if expected == actual
35
+
36
+ if expected.is_a?(Hash) && actual.is_a?(Hash)
37
+ # Check for missing keys
38
+ (expected.keys - actual.keys).each do |key|
39
+ flunk "Missing key '#{(path + [key]).join('.')}' in #{school}"
40
+ end
41
+
42
+ # Check for extra keys
43
+ (actual.keys - expected.keys).each do |key|
44
+ flunk "Extra key '#{(path + [key]).join('.')}' in #{school}"
45
+ end
46
+
47
+ # Compare values for common keys
48
+ (expected.keys & actual.keys).each do |key|
49
+ compare_nested_hashes(expected[key], actual[key], path + [key], school)
50
+ end
51
+ elsif expected.is_a?(Array) && actual.is_a?(Array)
52
+ if expected.length != actual.length
53
+ flunk "Array length mismatch at '#{path.join('.')}' in #{school}. Expected #{expected.length}, got #{actual.length}"
54
+ end
55
+ expected.zip(actual).each_with_index do |(exp_item, act_item), idx|
56
+ compare_nested_hashes(exp_item, act_item, path + [idx], school)
57
+ end
58
+ else
59
+ assert_equal expected, actual, "Value mismatch at '#{path.join('.')}' in #{school}. Expected #{expected.inspect}, got #{actual.inspect}"
60
+ end
61
+ end
62
+ end
metadata ADDED
@@ -0,0 +1,165 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: udise_school_report_reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Syed Fazil Basheer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2025-01-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pdf-reader
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: csv
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '13.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '13.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.0'
97
+ description: This gem provides functionality to read and parse UDISE school reports,
98
+ extracting various data points including teacher information, room details, location
99
+ data, and more.
100
+ email:
101
+ - fazil@fazn.co
102
+ executables: []
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - CHANGELOG.md
107
+ - LICENSE.txt
108
+ - README.md
109
+ - lib/udise_school_report_reader.rb
110
+ - lib/udise_school_report_reader/activities_data_reader.rb
111
+ - lib/udise_school_report_reader/anganwadi_data_reader.rb
112
+ - lib/udise_school_report_reader/basic_info_data_reader.rb
113
+ - lib/udise_school_report_reader/block_rectangle_combiner.rb
114
+ - lib/udise_school_report_reader/building_data_reader.rb
115
+ - lib/udise_school_report_reader/characteristics_reader.rb
116
+ - lib/udise_school_report_reader/csv_writer.rb
117
+ - lib/udise_school_report_reader/data_reader_base.rb
118
+ - lib/udise_school_report_reader/digital_facilities_data_reader.rb
119
+ - lib/udise_school_report_reader/enrollment_data_reader.rb
120
+ - lib/udise_school_report_reader/enrollment_html_writer.rb
121
+ - lib/udise_school_report_reader/enrollment_yaml_writer.rb
122
+ - lib/udise_school_report_reader/ews_data_reader.rb
123
+ - lib/udise_school_report_reader/ews_html_writer.rb
124
+ - lib/udise_school_report_reader/ews_yaml_writer.rb
125
+ - lib/udise_school_report_reader/location_data_reader.rb
126
+ - lib/udise_school_report_reader/official_data_reader.rb
127
+ - lib/udise_school_report_reader/pdf_block_extractor.rb
128
+ - lib/udise_school_report_reader/pdf_content_compressor.rb
129
+ - lib/udise_school_report_reader/pdf_rectangle_extractor.rb
130
+ - lib/udise_school_report_reader/rooms_data_reader.rb
131
+ - lib/udise_school_report_reader/rte_data_reader.rb
132
+ - lib/udise_school_report_reader/rte_html_writer.rb
133
+ - lib/udise_school_report_reader/rte_yaml_writer.rb
134
+ - lib/udise_school_report_reader/sanitation_data_reader.rb
135
+ - lib/udise_school_report_reader/school_report_parser.rb
136
+ - lib/udise_school_report_reader/teacher_data_reader.rb
137
+ - lib/udise_school_report_reader/version.rb
138
+ - test/school_report_parser_test.rb
139
+ homepage: https://github.com/UDISE-Plus/udise-school-report-reader
140
+ licenses:
141
+ - MIT
142
+ metadata:
143
+ homepage_uri: https://github.com/UDISE-Plus/udise-school-report-reader
144
+ source_code_uri: https://github.com/UDISE-Plus/udise-school-report-reader
145
+ changelog_uri: https://github.com/UDISE-Plus/udise-school-report-reader/blob/master/CHANGELOG.md
146
+ post_install_message:
147
+ rdoc_options: []
148
+ require_paths:
149
+ - lib
150
+ required_ruby_version: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ version: 2.6.0
155
+ required_rubygems_version: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ requirements: []
161
+ rubygems_version: 3.5.11
162
+ signing_key:
163
+ specification_version: 4
164
+ summary: A Ruby gem to parse and extract data from UDISE school reports
165
+ test_files: []