DataSure 0.6.3rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. datasure-0.6.3rc1/LICENSE +21 -0
  2. datasure-0.6.3rc1/PKG-INFO +321 -0
  3. datasure-0.6.3rc1/README.md +256 -0
  4. datasure-0.6.3rc1/pyproject.toml +178 -0
  5. datasure-0.6.3rc1/src/datasure/.streamlit/config.toml +80 -0
  6. datasure-0.6.3rc1/src/datasure/.streamlit/secrets.toml +1 -0
  7. datasure-0.6.3rc1/src/datasure/__init__.py +0 -0
  8. datasure-0.6.3rc1/src/datasure/app.py +139 -0
  9. datasure-0.6.3rc1/src/datasure/assets/IPA-primary-color-CMYK.jpg +0 -0
  10. datasure-0.6.3rc1/src/datasure/assets/IPA-primary-full-color-abbreviated.png +0 -0
  11. datasure-0.6.3rc1/src/datasure/assets/LinkedIn Cover IPA20.png +0 -0
  12. datasure-0.6.3rc1/src/datasure/assets/SurveyCTO-Logo-CMYK.png +0 -0
  13. datasure-0.6.3rc1/src/datasure/assets/code.png +0 -0
  14. datasure-0.6.3rc1/src/datasure/assets/datasure_logo.svg +73 -0
  15. datasure-0.6.3rc1/src/datasure/assets/demo_backcheck.csv +31 -0
  16. datasure-0.6.3rc1/src/datasure/assets/demo_survey.csv +133 -0
  17. datasure-0.6.3rc1/src/datasure/assets/hard-disk.png +0 -0
  18. datasure-0.6.3rc1/src/datasure/assets/microsoft_azure.png +0 -0
  19. datasure-0.6.3rc1/src/datasure/assets/python.png +0 -0
  20. datasure-0.6.3rc1/src/datasure/assets/storage.png +0 -0
  21. datasure-0.6.3rc1/src/datasure/checks/__init__.py +0 -0
  22. datasure-0.6.3rc1/src/datasure/checks/backchecks.py +4126 -0
  23. datasure-0.6.3rc1/src/datasure/checks/descriptive.py +4 -0
  24. datasure-0.6.3rc1/src/datasure/checks/duplicates.py +2167 -0
  25. datasure-0.6.3rc1/src/datasure/checks/enumerator.py +2587 -0
  26. datasure-0.6.3rc1/src/datasure/checks/gpschecks.py +2147 -0
  27. datasure-0.6.3rc1/src/datasure/checks/missing.py +1288 -0
  28. datasure-0.6.3rc1/src/datasure/checks/outliers.py +2796 -0
  29. datasure-0.6.3rc1/src/datasure/checks/progress.py +1414 -0
  30. datasure-0.6.3rc1/src/datasure/checks/summary.py +1448 -0
  31. datasure-0.6.3rc1/src/datasure/cli.py +89 -0
  32. datasure-0.6.3rc1/src/datasure/connectors/__init__.py +0 -0
  33. datasure-0.6.3rc1/src/datasure/connectors/local.py +353 -0
  34. datasure-0.6.3rc1/src/datasure/connectors/script.py +0 -0
  35. datasure-0.6.3rc1/src/datasure/connectors/scto.py +1315 -0
  36. datasure-0.6.3rc1/src/datasure/processing/__init__.py +0 -0
  37. datasure-0.6.3rc1/src/datasure/processing/corrections.py +634 -0
  38. datasure-0.6.3rc1/src/datasure/processing/prep.py +924 -0
  39. datasure-0.6.3rc1/src/datasure/utils/__init__.py +0 -0
  40. datasure-0.6.3rc1/src/datasure/utils/cache_utils.py +95 -0
  41. datasure-0.6.3rc1/src/datasure/utils/chart_utils.py +147 -0
  42. datasure-0.6.3rc1/src/datasure/utils/config_utils.py +725 -0
  43. datasure-0.6.3rc1/src/datasure/utils/dataframe_utils.py +157 -0
  44. datasure-0.6.3rc1/src/datasure/utils/duckdb_utils.py +428 -0
  45. datasure-0.6.3rc1/src/datasure/utils/metric_utils.py +19 -0
  46. datasure-0.6.3rc1/src/datasure/utils/navigations_utils.py +141 -0
  47. datasure-0.6.3rc1/src/datasure/utils/onboarding_utils.py +1746 -0
  48. datasure-0.6.3rc1/src/datasure/utils/prep_utils.py +524 -0
  49. datasure-0.6.3rc1/src/datasure/utils/scto_api.py +487 -0
  50. datasure-0.6.3rc1/src/datasure/utils/secure_credentials.py +539 -0
  51. datasure-0.6.3rc1/src/datasure/utils/settings_utils.py +237 -0
  52. datasure-0.6.3rc1/src/datasure/views/config_view.py +169 -0
  53. datasure-0.6.3rc1/src/datasure/views/correction_view.py +1052 -0
  54. datasure-0.6.3rc1/src/datasure/views/import_view.py +480 -0
  55. datasure-0.6.3rc1/src/datasure/views/output_view_template.py +508 -0
  56. datasure-0.6.3rc1/src/datasure/views/prep_view.py +813 -0
  57. datasure-0.6.3rc1/src/datasure/views/start_view.py +341 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Innovations for Poverty Action
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,321 @@
1
+ Metadata-Version: 2.3
2
+ Name: DataSure
3
+ Version: 0.6.3rc1
4
+ Summary: IPA Data Management System Dashboard
5
+ Keywords: data-quality,survey-data,streamlit,monitoring,hfc
6
+ Author: Innovations for Poverty Action
7
+ Author-email: Innovations for Poverty Action <researchsupport@poverty-action.org>
8
+ License: MIT License
9
+
10
+ Copyright (c) 2024 Innovations for Poverty Action
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ Classifier: Development Status :: 4 - Beta
30
+ Classifier: Intended Audience :: Developers
31
+ Classifier: Intended Audience :: Science/Research
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Programming Language :: Python :: 3.12
36
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
37
+ Requires-Dist: streamlit>=1.52.0
38
+ Requires-Dist: pandas>=2.2.2
39
+ Requires-Dist: plotly>=6.2.0
40
+ Requires-Dist: numpy>=2.2.3
41
+ Requires-Dist: openpyxl>=3.1.5
42
+ Requires-Dist: seaborn>=0.13.2
43
+ Requires-Dist: scikit-learn>=1.5.2
44
+ Requires-Dist: geopy>=2.4.1
45
+ Requires-Dist: requests>=2.32.3
46
+ Requires-Dist: streamlit-extras>=0.5.5
47
+ Requires-Dist: pyarrow>=16.0.0
48
+ Requires-Dist: opencv-python>=4.11.0.86
49
+ Requires-Dist: matplotlib>=3.10.0
50
+ Requires-Dist: polars>=1.30.0
51
+ Requires-Dist: duckdb>=1.3.1
52
+ Requires-Dist: pillow>=11.3.0
53
+ Requires-Dist: pydantic>=2.11.7
54
+ Requires-Dist: keyring>=25.6.0
55
+ Requires-Dist: polars-readstat>=0.5.1
56
+ Requires-Dist: fastexcel>=0.14.0
57
+ Requires-Dist: pytest>=8.0.0 ; extra == 'dev'
58
+ Requires-Dist: pytest-cov>=4.1.0 ; extra == 'dev'
59
+ Requires-Dist: ruff>=0.6.4 ; extra == 'dev'
60
+ Requires-Python: >=3.11
61
+ Project-URL: Issues, https://github.com/PovertyAction/datasure/issues
62
+ Project-URL: Source, https://github.com/PovertyAction/datasure
63
+ Provides-Extra: dev
64
+ Description-Content-Type: text/markdown
65
+
66
+ # DataSure
67
+
68
+ **DataSure** is IPA's Data Management System Dashboard - a comprehensive tool for survey data quality monitoring and high-frequency checks (HFCs) in research projects.
69
+
70
+ Built for data managers, survey coordinators, and research teams, DataSure provides real-time monitoring of survey data quality with interactive dashboards, automated checks, and flexible reporting capabilities.
71
+
72
+ ## Key Features
73
+
74
+ - **📊 Data Quality Monitoring**: Real-time dashboards for comprehensive survey data analysis
75
+ - **🔍 Automated Checks**: 10 specialized quality check modules including duplicates, outliers, GPS validation, and missing data analysis
76
+ - **📈 Interactive Visualizations**: Charts and maps for data exploration and quality assessment
77
+ - **🔗 Multi-Source Integration**: Direct SurveyCTO API connection plus CSV/Excel file support
78
+ - **⚙️ Flexible Configuration**: Project-based settings with customizable check parameters
79
+ - **📋 Comprehensive Reporting**: Export capabilities for different audiences and formats
80
+ - **🎯 Enumerator Performance**: Monitor data collection team productivity and quality metrics
81
+
82
+ ## Installation
83
+
84
+ ### Step 1: Install uv from terminal
85
+
86
+ ```bash
87
+ # WINDOWS
88
+ winget install astral-sh.uv
89
+
90
+ # MACOS/LINUX
91
+ brew install uv
92
+ ```
93
+
94
+ ### Step 2: Install datasure with uv
95
+
96
+ ```bash
97
+ # install
98
+ uv tool install datasure
99
+
100
+ # ON WINDOWS: update windows path after installation
101
+ uv tool update-shell
102
+ ```
103
+
104
+ ### Step 3: verify installation
105
+
106
+ ```bash
107
+ datasure --version
108
+ ```
109
+
110
+ ## Getting the latest release
111
+
112
+ ```bash
113
+ # if datasure is already install, get latest version with
114
+ uv tool upgrade datasure
115
+ ```
116
+
117
+ ## Quick Start
118
+
119
+ 1. **Launch the application**:
120
+
121
+ ```bash
122
+ datasure
123
+ ```
124
+
125
+ 2. **Create your first project** and configure data quality checks
126
+
127
+ 3. **Import survey data**:
128
+ - Connect directly to your SurveyCTO server
129
+ - Upload CSV or Excel files from local storage
130
+
131
+ 4. **Monitor data quality** with interactive dashboards organized into specialized check modules
132
+
133
+ 5. **Generate reports** and export results for your research team
134
+
135
+ ## System Requirements
136
+
137
+ - **Python**: Version 3.11 or higher
138
+ - **Operating System**: Windows, macOS, or Linux
139
+ - **Memory**: Minimum 4GB RAM (8GB recommended for large datasets)
140
+ - **Storage**: 1GB free space for application and data cache
141
+ - **Internet**: Required for SurveyCTO integration and updates
142
+
143
+ ## Data Quality Check Modules
144
+
145
+ DataSure includes 10 specialized modules for comprehensive survey data quality monitoring:
146
+
147
+ | Module | Purpose |
148
+ |--------|---------|
149
+ | **Summary** | Overall project progress and completion tracking |
150
+ | **Missing Data** | Identify patterns in incomplete responses |
151
+ | **Duplicates** | Find and manage duplicate survey entries |
152
+ | **GPS Validation** | Verify location data accuracy with interactive maps |
153
+ | **Outliers** | Identify unusual responses requiring review |
154
+ | **Enumerator Performance** | Monitor data collection team productivity |
155
+ | **Progress Tracking** | Real-time survey completion monitoring |
156
+ | **Descriptive Statistics** | Data distribution analysis and summaries |
157
+ | **Back-checks** | Verification workflow support |
158
+ | **Custom Checks** | Configure additional quality checks per project |
159
+
160
+ ## Core Capabilities
161
+
162
+ ### Data Import and Management
163
+
164
+ - **SurveyCTO Integration**: Direct API connection with form metadata and authentication
165
+ - **Local File Support**: CSV and Excel upload with automatic type detection
166
+ - **Multi-Project Organization**: Manage multiple surveys simultaneously
167
+ - **Data Preparation**: Cleaning and transformation workflows
168
+
169
+ ### Interactive Dashboards
170
+
171
+ - **Real-time Monitoring**: Live updates as new data arrives
172
+ - **Customizable Views**: Configure dashboards per project requirements
173
+ - **Export Options**: Generate reports in PDF, Excel, and other formats
174
+ - **Automated Alerts**: Notifications for quality issues requiring attention
175
+
176
+ ### Performance and Scalability
177
+
178
+ - **High-Performance Processing**: DuckDB backend for fast analytical queries
179
+ - **Large Dataset Support**: Optimized for datasets with hundreds of thousands of records
180
+ - **Intelligent Caching**: Reduces processing time and API calls
181
+ - **Cross-Platform Compatibility**: Works on Windows, macOS, and Linux
182
+
183
+ ## Getting Started - Application Usage
184
+
185
+ ### Using DataSure
186
+
187
+ Once DataSure is installed, you can begin monitoring your survey data quality:
188
+
189
+ #### 1. Launch the Application
190
+
191
+ ```bash
192
+ datasure
193
+ ```
194
+
195
+ The web interface will open in your default browser (typically at `http://localhost:8501`).
196
+
197
+ #### 2. Import Data
198
+
199
+ - **Import Data Page**: Start here to connect your data sources
200
+ - **SurveyCTO Integration**: Connect directly to your SurveyCTO server with authentication
201
+ - **Local Files**: Upload CSV or Excel files from your computer
202
+ - **Multiple Datasets**: Import and manage up to 10 datasets per project
203
+
204
+ #### 3. Prepare and Configure
205
+
206
+ - **Prepare Data Page**: Preview your imported datasets in separate tabs
207
+ - **Configure Checks Page**: Set up High-Frequency Checks (HFCs)
208
+ - Enter a page name for your quality monitoring dashboard
209
+ - Select the dataset to analyze
210
+ - Configure check parameters and thresholds
211
+ - Save settings to create your HFC page
212
+
213
+ #### 4. Monitor Data Quality
214
+
215
+ - **HFC Dashboard**: Access your configured quality check page
216
+ - **Interactive Tabs**: Each check type has its own tab (Summary, Missing Data, Duplicates, etc.)
217
+ - **Settings Expanders**: Configure specific parameters for each check
218
+ - **Real-time Updates**: Dashboard refreshes as new data becomes available
219
+
220
+ #### 5. Export and Share
221
+
222
+ - Generate reports for different audiences
223
+ - Export findings in various formats
224
+ - Monitor trends over time
225
+
226
+ ### Command Line Options
227
+
228
+ ```bash
229
+ # Show version information
230
+ datasure --version
231
+
232
+ # Launch with custom host/port
233
+ datasure --host 0.0.0.0 --port 8080
234
+
235
+ # View all available options
236
+ datasure --help
237
+ ```
238
+
239
+ ## Data Storage and Cache
240
+
241
+ DataSure automatically manages data storage and caching for optimal performance:
242
+
243
+ ### Cache Directory Locations
244
+
245
+ - **Development Mode**: `./cache/` (in project root)
246
+ - **Production Mode**:
247
+ - **Windows**: `%APPDATA%/datasure/cache/`
248
+ - **Linux/macOS**: `~/.local/share/datasure/cache/`
249
+
250
+ ### What's Stored
251
+
252
+ - **Project configurations**: HFC page settings and form configurations
253
+ - **Database files**: DuckDB databases for processed survey data
254
+ - **SurveyCTO cache**: Cached form metadata and server connections
255
+ - **User settings**: Check configurations and preferences
256
+
257
+ Cache directories are created automatically - no manual setup required.
258
+
259
+ ## Support and Resources
260
+
261
+ ### Getting Help
262
+
263
+ - **GitHub Issues**: [Report bugs and request features](https://github.com/PovertyAction/datasure/issues)
264
+ - **Email Support**: <researchsupport@poverty-action.org>
265
+ - **Documentation**: See [RELEASENOTES.md](RELEASENOTES.md) for latest updates
266
+
267
+ ### Version Information
268
+
269
+ - **Current Version**: See [RELEASENOTES.md](RELEASENOTES.md) for the latest release information
270
+ - **Version History**: Track all changes and improvements
271
+ - **Upgrade Instructions**: Follow installation commands above to get the latest version
272
+
273
+ ## Contributing
274
+
275
+ We welcome contributions from the research community! DataSure is developed by Innovations for Poverty Action (IPA) with input from data managers and survey coordinators worldwide.
276
+
277
+ ### Ways to Contribute
278
+
279
+ - **Report Issues**: Found a bug or have a feature request? [Open an issue](https://github.com/PovertyAction/datasure/issues)
280
+ - **Suggest Features**: Share ideas for new data quality checks or workflow improvements
281
+ - **Share Use Cases**: Help us understand how DataSure fits into different research workflows
282
+ - **Code Contributions**: Developers can contribute code improvements and new features
283
+
284
+ ### For Developers
285
+
286
+ If you're interested in contributing code or setting up a development environment, see our comprehensive [CONTRIBUTING.md](CONTRIBUTING.md) guide which includes:
287
+
288
+ - Development environment setup
289
+ - Code quality standards and testing requirements
290
+ - Package building and distribution workflows
291
+ - Release process and documentation guidelines
292
+ - Technical architecture and development patterns
293
+
294
+ ### Community Standards
295
+
296
+ - Use clear, descriptive language when reporting issues
297
+ - Follow our code of conduct and treat all contributors with respect
298
+ - Help create a welcoming environment for researchers and developers from all backgrounds
299
+
300
+ ## Authors and Acknowledgments
301
+
302
+ DataSure is developed and maintained by the [**Global Research & Data Science (GRDS)**](https://poverty-action.org/research-support) team at [**Innovations for Poverty Action (IPA)**](https://poverty-action.org/). Contact GRDS at <researchsupport@poverty-action.org>.
303
+
304
+ ### Core Development Team
305
+
306
+ - [Ishmail Azindoo Baako](https://poverty-action.org/people/ishmail-azindoo-baako)
307
+ - [Wesley Kirui](https://poverty-action.org/people/wesley-kirui)
308
+ - [Niall Keleher](https://poverty-action.org/people/niall-keleher)
309
+ - [Dania Ochoa](https://poverty-action.org/people/dania-ochoa)
310
+ - [Laura Lahoz](https://poverty-action.org/people/laura-lahoz)
311
+
312
+ ## License and Contact
313
+
314
+ - **License**: MIT License - see [LICENSE](LICENSE) file for details
315
+ - **Repository**: [https://github.com/PovertyAction/datasure](https://github.com/PovertyAction/datasure)
316
+ - **Organization**: Innovations for Poverty Action (IPA)
317
+ - **Contact**: <researchsupport@poverty-action.org>
318
+
319
+ ---
320
+
321
+ **DataSure** - Ensuring data quality for better research outcomes.
@@ -0,0 +1,256 @@
1
+ # DataSure
2
+
3
+ **DataSure** is IPA's Data Management System Dashboard - a comprehensive tool for survey data quality monitoring and high-frequency checks (HFCs) in research projects.
4
+
5
+ Built for data managers, survey coordinators, and research teams, DataSure provides real-time monitoring of survey data quality with interactive dashboards, automated checks, and flexible reporting capabilities.
6
+
7
+ ## Key Features
8
+
9
+ - **📊 Data Quality Monitoring**: Real-time dashboards for comprehensive survey data analysis
10
+ - **🔍 Automated Checks**: 10 specialized quality check modules including duplicates, outliers, GPS validation, and missing data analysis
11
+ - **📈 Interactive Visualizations**: Charts and maps for data exploration and quality assessment
12
+ - **🔗 Multi-Source Integration**: Direct SurveyCTO API connection plus CSV/Excel file support
13
+ - **⚙️ Flexible Configuration**: Project-based settings with customizable check parameters
14
+ - **📋 Comprehensive Reporting**: Export capabilities for different audiences and formats
15
+ - **🎯 Enumerator Performance**: Monitor data collection team productivity and quality metrics
16
+
17
+ ## Installation
18
+
19
+ ### Step 1: Install uv from terminal
20
+
21
+ ```bash
22
+ # WINDOWS
23
+ winget install astral-sh.uv
24
+
25
+ # MACOS/LINUX
26
+ brew install uv
27
+ ```
28
+
29
+ ### Step 2: Install datasure with uv
30
+
31
+ ```bash
32
+ # install
33
+ uv tool install datasure
34
+
35
+ # ON WINDOWS: update windows path after installation
36
+ uv tool update-shell
37
+ ```
38
+
39
+ ### Step 3: verify installation
40
+
41
+ ```bash
42
+ datasure --version
43
+ ```
44
+
45
+ ## Getting the latest release
46
+
47
+ ```bash
48
+ # if datasure is already install, get latest version with
49
+ uv tool upgrade datasure
50
+ ```
51
+
52
+ ## Quick Start
53
+
54
+ 1. **Launch the application**:
55
+
56
+ ```bash
57
+ datasure
58
+ ```
59
+
60
+ 2. **Create your first project** and configure data quality checks
61
+
62
+ 3. **Import survey data**:
63
+ - Connect directly to your SurveyCTO server
64
+ - Upload CSV or Excel files from local storage
65
+
66
+ 4. **Monitor data quality** with interactive dashboards organized into specialized check modules
67
+
68
+ 5. **Generate reports** and export results for your research team
69
+
70
+ ## System Requirements
71
+
72
+ - **Python**: Version 3.11 or higher
73
+ - **Operating System**: Windows, macOS, or Linux
74
+ - **Memory**: Minimum 4GB RAM (8GB recommended for large datasets)
75
+ - **Storage**: 1GB free space for application and data cache
76
+ - **Internet**: Required for SurveyCTO integration and updates
77
+
78
+ ## Data Quality Check Modules
79
+
80
+ DataSure includes 10 specialized modules for comprehensive survey data quality monitoring:
81
+
82
+ | Module | Purpose |
83
+ |--------|---------|
84
+ | **Summary** | Overall project progress and completion tracking |
85
+ | **Missing Data** | Identify patterns in incomplete responses |
86
+ | **Duplicates** | Find and manage duplicate survey entries |
87
+ | **GPS Validation** | Verify location data accuracy with interactive maps |
88
+ | **Outliers** | Identify unusual responses requiring review |
89
+ | **Enumerator Performance** | Monitor data collection team productivity |
90
+ | **Progress Tracking** | Real-time survey completion monitoring |
91
+ | **Descriptive Statistics** | Data distribution analysis and summaries |
92
+ | **Back-checks** | Verification workflow support |
93
+ | **Custom Checks** | Configure additional quality checks per project |
94
+
95
+ ## Core Capabilities
96
+
97
+ ### Data Import and Management
98
+
99
+ - **SurveyCTO Integration**: Direct API connection with form metadata and authentication
100
+ - **Local File Support**: CSV and Excel upload with automatic type detection
101
+ - **Multi-Project Organization**: Manage multiple surveys simultaneously
102
+ - **Data Preparation**: Cleaning and transformation workflows
103
+
104
+ ### Interactive Dashboards
105
+
106
+ - **Real-time Monitoring**: Live updates as new data arrives
107
+ - **Customizable Views**: Configure dashboards per project requirements
108
+ - **Export Options**: Generate reports in PDF, Excel, and other formats
109
+ - **Automated Alerts**: Notifications for quality issues requiring attention
110
+
111
+ ### Performance and Scalability
112
+
113
+ - **High-Performance Processing**: DuckDB backend for fast analytical queries
114
+ - **Large Dataset Support**: Optimized for datasets with hundreds of thousands of records
115
+ - **Intelligent Caching**: Reduces processing time and API calls
116
+ - **Cross-Platform Compatibility**: Works on Windows, macOS, and Linux
117
+
118
+ ## Getting Started - Application Usage
119
+
120
+ ### Using DataSure
121
+
122
+ Once DataSure is installed, you can begin monitoring your survey data quality:
123
+
124
+ #### 1. Launch the Application
125
+
126
+ ```bash
127
+ datasure
128
+ ```
129
+
130
+ The web interface will open in your default browser (typically at `http://localhost:8501`).
131
+
132
+ #### 2. Import Data
133
+
134
+ - **Import Data Page**: Start here to connect your data sources
135
+ - **SurveyCTO Integration**: Connect directly to your SurveyCTO server with authentication
136
+ - **Local Files**: Upload CSV or Excel files from your computer
137
+ - **Multiple Datasets**: Import and manage up to 10 datasets per project
138
+
139
+ #### 3. Prepare and Configure
140
+
141
+ - **Prepare Data Page**: Preview your imported datasets in separate tabs
142
+ - **Configure Checks Page**: Set up High-Frequency Checks (HFCs)
143
+ - Enter a page name for your quality monitoring dashboard
144
+ - Select the dataset to analyze
145
+ - Configure check parameters and thresholds
146
+ - Save settings to create your HFC page
147
+
148
+ #### 4. Monitor Data Quality
149
+
150
+ - **HFC Dashboard**: Access your configured quality check page
151
+ - **Interactive Tabs**: Each check type has its own tab (Summary, Missing Data, Duplicates, etc.)
152
+ - **Settings Expanders**: Configure specific parameters for each check
153
+ - **Real-time Updates**: Dashboard refreshes as new data becomes available
154
+
155
+ #### 5. Export and Share
156
+
157
+ - Generate reports for different audiences
158
+ - Export findings in various formats
159
+ - Monitor trends over time
160
+
161
+ ### Command Line Options
162
+
163
+ ```bash
164
+ # Show version information
165
+ datasure --version
166
+
167
+ # Launch with custom host/port
168
+ datasure --host 0.0.0.0 --port 8080
169
+
170
+ # View all available options
171
+ datasure --help
172
+ ```
173
+
174
+ ## Data Storage and Cache
175
+
176
+ DataSure automatically manages data storage and caching for optimal performance:
177
+
178
+ ### Cache Directory Locations
179
+
180
+ - **Development Mode**: `./cache/` (in project root)
181
+ - **Production Mode**:
182
+ - **Windows**: `%APPDATA%/datasure/cache/`
183
+ - **Linux/macOS**: `~/.local/share/datasure/cache/`
184
+
185
+ ### What's Stored
186
+
187
+ - **Project configurations**: HFC page settings and form configurations
188
+ - **Database files**: DuckDB databases for processed survey data
189
+ - **SurveyCTO cache**: Cached form metadata and server connections
190
+ - **User settings**: Check configurations and preferences
191
+
192
+ Cache directories are created automatically - no manual setup required.
193
+
194
+ ## Support and Resources
195
+
196
+ ### Getting Help
197
+
198
+ - **GitHub Issues**: [Report bugs and request features](https://github.com/PovertyAction/datasure/issues)
199
+ - **Email Support**: <researchsupport@poverty-action.org>
200
+ - **Documentation**: See [RELEASENOTES.md](RELEASENOTES.md) for latest updates
201
+
202
+ ### Version Information
203
+
204
+ - **Current Version**: See [RELEASENOTES.md](RELEASENOTES.md) for the latest release information
205
+ - **Version History**: Track all changes and improvements
206
+ - **Upgrade Instructions**: Follow installation commands above to get the latest version
207
+
208
+ ## Contributing
209
+
210
+ We welcome contributions from the research community! DataSure is developed by Innovations for Poverty Action (IPA) with input from data managers and survey coordinators worldwide.
211
+
212
+ ### Ways to Contribute
213
+
214
+ - **Report Issues**: Found a bug or have a feature request? [Open an issue](https://github.com/PovertyAction/datasure/issues)
215
+ - **Suggest Features**: Share ideas for new data quality checks or workflow improvements
216
+ - **Share Use Cases**: Help us understand how DataSure fits into different research workflows
217
+ - **Code Contributions**: Developers can contribute code improvements and new features
218
+
219
+ ### For Developers
220
+
221
+ If you're interested in contributing code or setting up a development environment, see our comprehensive [CONTRIBUTING.md](CONTRIBUTING.md) guide which includes:
222
+
223
+ - Development environment setup
224
+ - Code quality standards and testing requirements
225
+ - Package building and distribution workflows
226
+ - Release process and documentation guidelines
227
+ - Technical architecture and development patterns
228
+
229
+ ### Community Standards
230
+
231
+ - Use clear, descriptive language when reporting issues
232
+ - Follow our code of conduct and treat all contributors with respect
233
+ - Help create a welcoming environment for researchers and developers from all backgrounds
234
+
235
+ ## Authors and Acknowledgments
236
+
237
+ DataSure is developed and maintained by the [**Global Research & Data Science (GRDS)**](https://poverty-action.org/research-support) team at [**Innovations for Poverty Action (IPA)**](https://poverty-action.org/). Contact GRDS at <researchsupport@poverty-action.org>.
238
+
239
+ ### Core Development Team
240
+
241
+ - [Ishmail Azindoo Baako](https://poverty-action.org/people/ishmail-azindoo-baako)
242
+ - [Wesley Kirui](https://poverty-action.org/people/wesley-kirui)
243
+ - [Niall Keleher](https://poverty-action.org/people/niall-keleher)
244
+ - [Dania Ochoa](https://poverty-action.org/people/dania-ochoa)
245
+ - [Laura Lahoz](https://poverty-action.org/people/laura-lahoz)
246
+
247
+ ## License and Contact
248
+
249
+ - **License**: MIT License - see [LICENSE](LICENSE) file for details
250
+ - **Repository**: [https://github.com/PovertyAction/datasure](https://github.com/PovertyAction/datasure)
251
+ - **Organization**: Innovations for Poverty Action (IPA)
252
+ - **Contact**: <researchsupport@poverty-action.org>
253
+
254
+ ---
255
+
256
+ **DataSure** - Ensuring data quality for better research outcomes.